mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
gguf : init
This commit is contained in:
parent
5488fb789e
commit
4d698495ea
1
.gitignore
vendored
1
.gitignore
vendored
@ -45,6 +45,7 @@ models-mnt
|
||||
/server
|
||||
/Pipfile
|
||||
/embd-input-test
|
||||
/gguf
|
||||
/libllama.so
|
||||
build-info.h
|
||||
arm_neon.h
|
||||
|
7
Makefile
7
Makefile
@ -1,5 +1,5 @@
|
||||
# Define the default target now so that it is always the first target
|
||||
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple server embd-input-test
|
||||
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple server embd-input-test gguf
|
||||
|
||||
# Binaries only useful for tests
|
||||
TEST_TARGETS = tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0
|
||||
@ -330,7 +330,7 @@ libllama.so: llama.o ggml.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
||||
|
||||
clean:
|
||||
rm -vf *.o *.so *.dll main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch embd-input-test build-info.h $(TEST_TARGETS)
|
||||
rm -vf *.o *.so *.dll main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch embd-input-test gguf build-info.h $(TEST_TARGETS)
|
||||
|
||||
#
|
||||
# Examples
|
||||
@ -370,6 +370,9 @@ $(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-in
|
||||
embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %$(DSO_EXT),$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput
|
||||
|
||||
gguf: examples/gguf/gguf.cpp build-info.h ggml.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
|
34
examples/gguf/gguf.cpp
Normal file
34
examples/gguf/gguf.cpp
Normal file
@ -0,0 +1,34 @@
|
||||
#include "ggml.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
bool gguf_write(const std::string & fname) {
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool gguf_read(const std::string & fname) {
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
if (argc < 3) {
|
||||
fprintf(stdout, "usage: %s data.gguf r|w\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const std::string fname(argv[1]);
|
||||
const std::string mode(argv[2]);
|
||||
|
||||
GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w");
|
||||
|
||||
if (mode == "w") {
|
||||
GGML_ASSERT(gguf_write(fname) && "failed to write gguf file");
|
||||
} else if (mode == "r") {
|
||||
GGML_ASSERT(gguf_read(fname) && "failed to read gguf file");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
25
ggml.h
25
ggml.h
@ -190,6 +190,9 @@
|
||||
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
||||
#define GGML_FILE_VERSION 1
|
||||
|
||||
#define GGUF_FILE_MAGIC 0x47475546 // "GGUF"
|
||||
#define GGUF_FILE_VERSION 1
|
||||
|
||||
#define GGML_QNT_VERSION 2 // bump this on quantization format changes
|
||||
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
|
||||
|
||||
@ -202,7 +205,6 @@
|
||||
#define GGML_MAX_OP_PARAMS 32
|
||||
#define GGML_DEFAULT_N_THREADS 4
|
||||
|
||||
|
||||
#define GGML_EXIT_SUCCESS 0
|
||||
#define GGML_EXIT_ABORTED 1
|
||||
|
||||
@ -1611,6 +1613,27 @@ extern "C" {
|
||||
|
||||
GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
|
||||
|
||||
//
|
||||
// gguf
|
||||
//
|
||||
|
||||
enum gguf_metadata_value_type {
|
||||
GGUF_METADATA_VALUE_TYPE_UINT8 = 0,
|
||||
GGUF_METADATA_VALUE_TYPE_INT8 = 1,
|
||||
GGUF_METADATA_VALUE_TYPE_UINT16 = 2,
|
||||
GGUF_METADATA_VALUE_TYPE_INT16 = 3,
|
||||
GGUF_METADATA_VALUE_TYPE_UINT32 = 4,
|
||||
GGUF_METADATA_VALUE_TYPE_INT32 = 5,
|
||||
GGUF_METADATA_VALUE_TYPE_FLOAT32 = 6,
|
||||
GGUF_METADATA_VALUE_TYPE_BOOL = 7,
|
||||
GGUF_METADATA_VALUE_TYPE_STRING = 8,
|
||||
GGUF_METADATA_VALUE_TYPE_ARRAY = 9,
|
||||
};
|
||||
|
||||
struct gguf_string {
|
||||
uint32_t n;
|
||||
char * data;
|
||||
};
|
||||
//
|
||||
// system info
|
||||
//
|
||||
|
Loading…
Reference in New Issue
Block a user