mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 02:44:36 +00:00
build : enable more non-default compiler warnings (#3200)
This commit is contained in:
parent
0ccfc62a96
commit
bc39553c90
1
.gitignore
vendored
1
.gitignore
vendored
@ -45,6 +45,7 @@ models-mnt
|
|||||||
/main
|
/main
|
||||||
/metal
|
/metal
|
||||||
/perplexity
|
/perplexity
|
||||||
|
/q8dot
|
||||||
/quantize
|
/quantize
|
||||||
/quantize-stats
|
/quantize-stats
|
||||||
/result
|
/result
|
||||||
|
@ -414,37 +414,38 @@ endif()
|
|||||||
|
|
||||||
if (LLAMA_ALL_WARNINGS)
|
if (LLAMA_ALL_WARNINGS)
|
||||||
if (NOT MSVC)
|
if (NOT MSVC)
|
||||||
set(c_flags
|
set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
||||||
-Wall
|
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
|
||||||
-Wextra
|
-Werror=implicit-function-declaration)
|
||||||
-Wpedantic
|
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
|
||||||
-Wcast-qual
|
|
||||||
-Wdouble-promotion
|
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||||
-Wshadow
|
set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
|
||||||
-Wstrict-prototypes
|
set(cxx_flags ${cxx_flags} -Wmissing-prototypes -Wextra-semi)
|
||||||
-Wpointer-arith
|
|
||||||
-Wmissing-prototypes
|
if (
|
||||||
-Werror=implicit-int
|
(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
|
||||||
-Wno-unused-function
|
(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 7.3.0)
|
||||||
)
|
)
|
||||||
set(cxx_flags
|
set(c_flags ${c_flags} -Wdouble-promotion)
|
||||||
-Wall
|
endif()
|
||||||
-Wextra
|
elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
|
||||||
-Wpedantic
|
set(c_flags ${c_flags} -Wdouble-promotion)
|
||||||
-Wcast-qual
|
set(cxx_flags ${cxx_flags} -Wno-array-bounds)
|
||||||
-Wmissing-declarations
|
|
||||||
-Wno-unused-function
|
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
|
||||||
-Wno-multichar
|
set(cxx_flags ${cxx_flags} -Wno-format-truncation)
|
||||||
)
|
endif()
|
||||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
|
||||||
# g++ only
|
set(cxx_flags ${cxx_flags} -Wextra-semi)
|
||||||
set(cxx_flags ${cxx_flags} -Wno-format-truncation -Wno-array-bounds)
|
endif()
|
||||||
endif()
|
endif()
|
||||||
else()
|
else()
|
||||||
# todo : msvc
|
# todo : msvc
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_compile_options(
|
add_compile_options(
|
||||||
|
${warning_flags}
|
||||||
"$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
|
"$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
|
||||||
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
|
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
|
||||||
)
|
)
|
||||||
|
69
Makefile
69
Makefile
@ -1,5 +1,5 @@
|
|||||||
# Define the default target now so that it is always the first target
|
# Define the default target now so that it is always the first target
|
||||||
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative parallel finetune export-lora tests/test-c.o
|
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative benchmark-matmult parallel finetune export-lora tests/test-c.o
|
||||||
|
|
||||||
# Binaries only useful for tests
|
# Binaries only useful for tests
|
||||||
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama
|
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama
|
||||||
@ -19,6 +19,20 @@ ifndef UNAME_M
|
|||||||
UNAME_M := $(shell uname -m)
|
UNAME_M := $(shell uname -m)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq '' '$(findstring clang,$(shell $(CC) --version))'
|
||||||
|
CC_IS_GCC=1
|
||||||
|
CC_VER := $(shell $(CC) -dumpfullversion -dumpversion | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
|
||||||
|
else
|
||||||
|
CC_IS_CLANG=1
|
||||||
|
ifeq '' '$(findstring Apple LLVM,$(shell $(CC) --version))'
|
||||||
|
CC_IS_LLVM_CLANG=1
|
||||||
|
else
|
||||||
|
CC_IS_APPLE_CLANG=1
|
||||||
|
endif
|
||||||
|
CC_VER := $(shell $(CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
|
||||||
|
| awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
|
||||||
|
endif
|
||||||
|
|
||||||
# Mac OS + Arm can report x86_64
|
# Mac OS + Arm can report x86_64
|
||||||
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
||||||
ifeq ($(UNAME_S),Darwin)
|
ifeq ($(UNAME_S),Darwin)
|
||||||
@ -87,9 +101,6 @@ CC := riscv64-unknown-linux-gnu-gcc
|
|||||||
CXX := riscv64-unknown-linux-gnu-g++
|
CXX := riscv64-unknown-linux-gnu-g++
|
||||||
endif
|
endif
|
||||||
|
|
||||||
CCV := $(shell $(CC) --version | head -n 1)
|
|
||||||
CXXV := $(shell $(CXX) --version | head -n 1)
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Compile flags
|
# Compile flags
|
||||||
#
|
#
|
||||||
@ -173,20 +184,33 @@ ifdef LLAMA_DISABLE_LOGS
|
|||||||
endif # LLAMA_DISABLE_LOGS
|
endif # LLAMA_DISABLE_LOGS
|
||||||
|
|
||||||
# warnings
|
# warnings
|
||||||
MK_CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
|
WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||||
-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
|
MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
|
||||||
MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar
|
-Werror=implicit-function-declaration
|
||||||
|
MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
|
||||||
|
|
||||||
# TODO(cebtenzzre): remove this once PR #2632 gets merged
|
ifeq ($(CC_IS_CLANG), 1)
|
||||||
TTFS_CXXFLAGS = $(CXXFLAGS) -Wno-missing-declarations
|
# clang options
|
||||||
|
MK_CFLAGS += -Wunreachable-code-break -Wunreachable-code-return
|
||||||
|
MK_HOST_CXXFLAGS += -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
|
||||||
|
|
||||||
ifneq '' '$(findstring clang,$(shell $(CXX) --version))'
|
ifneq '' '$(and $(CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 030800)))'
|
||||||
# clang++ only
|
MK_CFLAGS += -Wdouble-promotion
|
||||||
MK_CXXFLAGS += -Wmissing-prototypes
|
endif
|
||||||
TTFS_CXXFLAGS += -Wno-missing-prototypes
|
ifneq '' '$(and $(CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 070300)))'
|
||||||
|
MK_CFLAGS += -Wdouble-promotion
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
# g++ only
|
# gcc options
|
||||||
MK_CXXFLAGS += -Wno-format-truncation -Wno-array-bounds
|
MK_CFLAGS += -Wdouble-promotion
|
||||||
|
MK_HOST_CXXFLAGS += -Wno-array-bounds
|
||||||
|
|
||||||
|
ifeq ($(shell expr $(CC_VER) \>= 070100), 1)
|
||||||
|
MK_HOST_CXXFLAGS += -Wno-format-truncation
|
||||||
|
endif
|
||||||
|
ifeq ($(shell expr $(CC_VER) \>= 080100), 1)
|
||||||
|
MK_HOST_CXXFLAGS += -Wextra-semi
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# OS specific
|
# OS specific
|
||||||
@ -382,7 +406,7 @@ ifdef LLAMA_CUDA_CCBIN
|
|||||||
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||||
endif
|
endif
|
||||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||||
$(NVCC) $(NVCCFLAGS) -Wno-pedantic -c $< -o $@
|
$(NVCC) $(NVCCFLAGS) -c $< -o $@
|
||||||
endif # LLAMA_CUBLAS
|
endif # LLAMA_CUBLAS
|
||||||
|
|
||||||
ifdef LLAMA_CLBLAST
|
ifdef LLAMA_CLBLAST
|
||||||
@ -472,8 +496,8 @@ $(info I CFLAGS: $(CFLAGS))
|
|||||||
$(info I CXXFLAGS: $(CXXFLAGS))
|
$(info I CXXFLAGS: $(CXXFLAGS))
|
||||||
$(info I NVCCFLAGS: $(NVCCFLAGS))
|
$(info I NVCCFLAGS: $(NVCCFLAGS))
|
||||||
$(info I LDFLAGS: $(LDFLAGS))
|
$(info I LDFLAGS: $(LDFLAGS))
|
||||||
$(info I CC: $(CCV))
|
$(info I CC: $(shell $(CC) --version | head -n 1))
|
||||||
$(info I CXX: $(CXXV))
|
$(info I CXX: $(shell $(CXX) --version | head -n 1))
|
||||||
$(info )
|
$(info )
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -554,7 +578,7 @@ gguf: examples/gguf/gguf.cpp ggml.o llama.o $(OBJS)
|
|||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o train.o $(OBJS)
|
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o train.o $(OBJS)
|
||||||
$(CXX) $(TTFS_CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
|
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
@ -601,11 +625,18 @@ tests: $(TEST_TARGETS)
|
|||||||
|
|
||||||
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS)
|
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
run-benchmark-matmult: benchmark-matmult
|
||||||
./$@
|
./$@
|
||||||
|
|
||||||
|
.PHONY: run-benchmark-matmult
|
||||||
|
|
||||||
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
|
||||||
|
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o common.o grammar-parser.o $(OBJS)
|
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o common.o grammar-parser.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
@ -755,10 +755,9 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
|
|||||||
case 7: return "He";
|
case 7: return "He";
|
||||||
case 8: return "She";
|
case 8: return "She";
|
||||||
case 9: return "They";
|
case 9: return "They";
|
||||||
default: return "To";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return "The";
|
GGML_UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
74
common/log.h
74
common/log.h
@ -225,31 +225,31 @@ enum LogTriState
|
|||||||
// USE LOG() INSTEAD
|
// USE LOG() INSTEAD
|
||||||
//
|
//
|
||||||
#ifndef _MSC_VER
|
#ifndef _MSC_VER
|
||||||
#define LOG_IMPL(str, ...) \
|
#define LOG_IMPL(str, ...) \
|
||||||
{ \
|
do { \
|
||||||
if (LOG_TARGET != nullptr) \
|
if (LOG_TARGET != nullptr) \
|
||||||
{ \
|
{ \
|
||||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
|
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
|
||||||
fflush(LOG_TARGET); \
|
fflush(LOG_TARGET); \
|
||||||
} \
|
} \
|
||||||
}
|
} while (0)
|
||||||
#else
|
#else
|
||||||
#define LOG_IMPL(str, ...) \
|
#define LOG_IMPL(str, ...) \
|
||||||
{ \
|
do { \
|
||||||
if (LOG_TARGET != nullptr) \
|
if (LOG_TARGET != nullptr) \
|
||||||
{ \
|
{ \
|
||||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
|
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
|
||||||
fflush(LOG_TARGET); \
|
fflush(LOG_TARGET); \
|
||||||
} \
|
} \
|
||||||
}
|
} while (0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// INTERNAL, DO NOT USE
|
// INTERNAL, DO NOT USE
|
||||||
// USE LOG_TEE() INSTEAD
|
// USE LOG_TEE() INSTEAD
|
||||||
//
|
//
|
||||||
#ifndef _MSC_VER
|
#ifndef _MSC_VER
|
||||||
#define LOG_TEE_IMPL(str, ...) \
|
#define LOG_TEE_IMPL(str, ...) \
|
||||||
{ \
|
do { \
|
||||||
if (LOG_TARGET != nullptr) \
|
if (LOG_TARGET != nullptr) \
|
||||||
{ \
|
{ \
|
||||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
|
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
|
||||||
@ -260,10 +260,10 @@ enum LogTriState
|
|||||||
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL, __VA_ARGS__); \
|
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL, __VA_ARGS__); \
|
||||||
fflush(LOG_TEE_TARGET); \
|
fflush(LOG_TEE_TARGET); \
|
||||||
} \
|
} \
|
||||||
}
|
} while (0)
|
||||||
#else
|
#else
|
||||||
#define LOG_TEE_IMPL(str, ...) \
|
#define LOG_TEE_IMPL(str, ...) \
|
||||||
{ \
|
do { \
|
||||||
if (LOG_TARGET != nullptr) \
|
if (LOG_TARGET != nullptr) \
|
||||||
{ \
|
{ \
|
||||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
|
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
|
||||||
@ -274,7 +274,7 @@ enum LogTriState
|
|||||||
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL "", ##__VA_ARGS__); \
|
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL "", ##__VA_ARGS__); \
|
||||||
fflush(LOG_TEE_TARGET); \
|
fflush(LOG_TEE_TARGET); \
|
||||||
} \
|
} \
|
||||||
}
|
} while (0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The '\0' as a last argument, is a trick to bypass the silly
|
// The '\0' as a last argument, is a trick to bypass the silly
|
||||||
@ -435,41 +435,41 @@ inline FILE *log_handler() { return log_handler1_impl(); }
|
|||||||
inline void log_test()
|
inline void log_test()
|
||||||
{
|
{
|
||||||
log_disable();
|
log_disable();
|
||||||
LOG("01 Hello World to nobody, because logs are disabled!\n")
|
LOG("01 Hello World to nobody, because logs are disabled!\n");
|
||||||
log_enable();
|
log_enable();
|
||||||
LOG("02 Hello World to default output, which is \"%s\" ( Yaaay, arguments! )!\n", LOG_STRINGIZE(LOG_TARGET))
|
LOG("02 Hello World to default output, which is \"%s\" ( Yaaay, arguments! )!\n", LOG_STRINGIZE(LOG_TARGET));
|
||||||
LOG_TEE("03 Hello World to **both** default output and " LOG_TEE_TARGET_STRING "!\n")
|
LOG_TEE("03 Hello World to **both** default output and " LOG_TEE_TARGET_STRING "!\n");
|
||||||
log_set_target(stderr);
|
log_set_target(stderr);
|
||||||
LOG("04 Hello World to stderr!\n")
|
LOG("04 Hello World to stderr!\n");
|
||||||
LOG_TEE("05 Hello World TEE with double printing to stderr prevented!\n")
|
LOG_TEE("05 Hello World TEE with double printing to stderr prevented!\n");
|
||||||
log_set_target(LOG_DEFAULT_FILE_NAME);
|
log_set_target(LOG_DEFAULT_FILE_NAME);
|
||||||
LOG("06 Hello World to default log file!\n")
|
LOG("06 Hello World to default log file!\n");
|
||||||
log_set_target(stdout);
|
log_set_target(stdout);
|
||||||
LOG("07 Hello World to stdout!\n")
|
LOG("07 Hello World to stdout!\n");
|
||||||
log_set_target(LOG_DEFAULT_FILE_NAME);
|
log_set_target(LOG_DEFAULT_FILE_NAME);
|
||||||
LOG("08 Hello World to default log file again!\n")
|
LOG("08 Hello World to default log file again!\n");
|
||||||
log_disable();
|
log_disable();
|
||||||
LOG("09 Hello World _1_ into the void!\n")
|
LOG("09 Hello World _1_ into the void!\n");
|
||||||
log_enable();
|
log_enable();
|
||||||
LOG("10 Hello World back from the void ( you should not see _1_ in the log or the output )!\n")
|
LOG("10 Hello World back from the void ( you should not see _1_ in the log or the output )!\n");
|
||||||
log_disable();
|
log_disable();
|
||||||
log_set_target("llama.anotherlog.log");
|
log_set_target("llama.anotherlog.log");
|
||||||
LOG("11 Hello World _2_ to nobody, new target was selected but logs are still disabled!\n")
|
LOG("11 Hello World _2_ to nobody, new target was selected but logs are still disabled!\n");
|
||||||
log_enable();
|
log_enable();
|
||||||
LOG("12 Hello World this time in a new file ( you should not see _2_ in the log or the output )?\n")
|
LOG("12 Hello World this time in a new file ( you should not see _2_ in the log or the output )?\n");
|
||||||
log_set_target("llama.yetanotherlog.log");
|
log_set_target("llama.yetanotherlog.log");
|
||||||
LOG("13 Hello World this time in yet new file?\n")
|
LOG("13 Hello World this time in yet new file?\n");
|
||||||
log_set_target(log_filename_generator("llama_autonamed", "log"));
|
log_set_target(log_filename_generator("llama_autonamed", "log"));
|
||||||
LOG("14 Hello World in log with generated filename!\n")
|
LOG("14 Hello World in log with generated filename!\n");
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
LOG_TEE("15 Hello msvc TEE without arguments\n")
|
LOG_TEE("15 Hello msvc TEE without arguments\n");
|
||||||
LOG_TEE("16 Hello msvc TEE with (%d)(%s) arguments\n", 1, "test")
|
LOG_TEE("16 Hello msvc TEE with (%d)(%s) arguments\n", 1, "test");
|
||||||
LOG_TEELN("17 Hello msvc TEELN without arguments\n")
|
LOG_TEELN("17 Hello msvc TEELN without arguments\n");
|
||||||
LOG_TEELN("18 Hello msvc TEELN with (%d)(%s) arguments\n", 1, "test")
|
LOG_TEELN("18 Hello msvc TEELN with (%d)(%s) arguments\n", 1, "test");
|
||||||
LOG("19 Hello msvc LOG without arguments\n")
|
LOG("19 Hello msvc LOG without arguments\n");
|
||||||
LOG("20 Hello msvc LOG with (%d)(%s) arguments\n", 1, "test")
|
LOG("20 Hello msvc LOG with (%d)(%s) arguments\n", 1, "test");
|
||||||
LOGLN("21 Hello msvc LOGLN without arguments\n")
|
LOGLN("21 Hello msvc LOGLN without arguments\n");
|
||||||
LOGLN("22 Hello msvc LOGLN with (%d)(%s) arguments\n", 1, "test")
|
LOGLN("22 Hello msvc LOGLN with (%d)(%s) arguments\n", 1, "test");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -542,7 +542,7 @@ inline void log_dump_cmdline_impl(int argc, char **argv)
|
|||||||
buf << " " << argv[i];
|
buf << " " << argv[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOGLN("Cmd:%s", buf.str().c_str())
|
LOGLN("Cmd:%s", buf.str().c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
#define log_tostr(var) log_var_to_string_impl(var).c_str()
|
#define log_tostr(var) log_var_to_string_impl(var).c_str()
|
||||||
@ -620,10 +620,10 @@ inline std::string log_var_to_string_impl(const std::vector<int> & var)
|
|||||||
#define LOGLN(...) // dummy stub
|
#define LOGLN(...) // dummy stub
|
||||||
|
|
||||||
#undef LOG_TEE
|
#undef LOG_TEE
|
||||||
#define LOG_TEE(...) fprintf(stderr, __VA_ARGS__); // convert to normal fprintf
|
#define LOG_TEE(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
|
||||||
|
|
||||||
#undef LOG_TEELN
|
#undef LOG_TEELN
|
||||||
#define LOG_TEELN(...) fprintf(stderr, __VA_ARGS__); // convert to normal fprintf
|
#define LOG_TEELN(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
|
||||||
|
|
||||||
#undef LOG_DISABLE
|
#undef LOG_DISABLE
|
||||||
#define LOG_DISABLE() // dummy stub
|
#define LOG_DISABLE() // dummy stub
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "train.h"
|
#include "train.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <random>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <random>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
@ -64,7 +67,7 @@ static struct ggml_tensor * randomize_tensor(
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
};
|
}
|
||||||
|
|
||||||
return tensor;
|
return tensor;
|
||||||
}
|
}
|
||||||
@ -389,7 +392,7 @@ static void randomize_model_lora(
|
|||||||
free_random_normal_distribution(rnd);
|
free_random_normal_distribution(rnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) {
|
static void init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) {
|
||||||
const auto & hparams = model->hparams;
|
const auto & hparams = model->hparams;
|
||||||
|
|
||||||
const uint32_t n_ctx = hparams.n_ctx;
|
const uint32_t n_ctx = hparams.n_ctx;
|
||||||
@ -415,14 +418,12 @@ static bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * mod
|
|||||||
|
|
||||||
if (!cache->ctx) {
|
if (!cache->ctx) {
|
||||||
fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__);
|
fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__);
|
||||||
return false;
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cache->k = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements);
|
cache->k = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements);
|
||||||
cache->v = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements);
|
cache->v = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements);
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * model, int n_batch) {
|
static bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * model, int n_batch) {
|
||||||
|
@ -655,9 +655,9 @@ struct printer {
|
|||||||
virtual ~printer() {}
|
virtual ~printer() {}
|
||||||
|
|
||||||
FILE * fout;
|
FILE * fout;
|
||||||
virtual void print_header(const cmd_params & params) { (void) params; };
|
virtual void print_header(const cmd_params & params) { (void) params; }
|
||||||
virtual void print_test(const test & t) = 0;
|
virtual void print_test(const test & t) = 0;
|
||||||
virtual void print_footer() { };
|
virtual void print_footer() { }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct csv_printer : public printer {
|
struct csv_printer : public printer {
|
||||||
|
@ -852,7 +852,7 @@ int main(int argc, char ** argv) {
|
|||||||
llama_backend_free();
|
llama_backend_free();
|
||||||
|
|
||||||
#ifndef LOG_DISABLE_LOGS
|
#ifndef LOG_DISABLE_LOGS
|
||||||
LOG_TEE("Log end\n")
|
LOG_TEE("Log end\n");
|
||||||
#endif // LOG_DISABLE_LOGS
|
#endif // LOG_DISABLE_LOGS
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -72,6 +72,7 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp
|
|||||||
// usage:
|
// usage:
|
||||||
// ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.gguf [models/llama/ggml-model-quant.gguf] type [nthreads]
|
// ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.gguf [models/llama/ggml-model-quant.gguf] type [nthreads]
|
||||||
//
|
//
|
||||||
|
[[noreturn]]
|
||||||
static void usage(const char * executable) {
|
static void usage(const char * executable) {
|
||||||
printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n", executable);
|
printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n", executable);
|
||||||
printf(" --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n");
|
printf(" --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n");
|
||||||
|
@ -483,7 +483,7 @@ static struct ggml_tensor * llama_build_train_graphs(
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define GGUF_GET_KEY(ctx, dst, func, type, req, key) \
|
#define GGUF_GET_KEY(ctx, dst, func, type, req, key) \
|
||||||
{ \
|
do { \
|
||||||
const std::string skey(key); \
|
const std::string skey(key); \
|
||||||
const int kid = gguf_find_key(ctx, skey.c_str()); \
|
const int kid = gguf_find_key(ctx, skey.c_str()); \
|
||||||
if (kid >= 0) { \
|
if (kid >= 0) { \
|
||||||
@ -495,7 +495,7 @@ static struct ggml_tensor * llama_build_train_graphs(
|
|||||||
} else if (req) { \
|
} else if (req) { \
|
||||||
die_fmt("key not found in model: %s", skey.c_str()); \
|
die_fmt("key not found in model: %s", skey.c_str()); \
|
||||||
} \
|
} \
|
||||||
}
|
} while (0)
|
||||||
|
|
||||||
static void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model) {
|
static void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model) {
|
||||||
// NOTE: gguf_context must be initialized with f_ggml_ctx and no_alloc=false, otherwise tensor data can not be read
|
// NOTE: gguf_context must be initialized with f_ggml_ctx and no_alloc=false, otherwise tensor data can not be read
|
||||||
@ -786,7 +786,7 @@ struct train_params {
|
|||||||
float rope_freq_scale;
|
float rope_freq_scale;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct train_params get_default_train_params() {
|
static struct train_params get_default_train_params() {
|
||||||
struct train_params params;
|
struct train_params params;
|
||||||
params.common = get_default_train_params_common();
|
params.common = get_default_train_params_common();
|
||||||
params.fn_vocab_model = "ggml-vic7b-uncensored-q4_0.bin";
|
params.fn_vocab_model = "ggml-vic7b-uncensored-q4_0.bin";
|
||||||
|
288
ggml.c
288
ggml.c
@ -245,18 +245,18 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
|||||||
//
|
//
|
||||||
|
|
||||||
#define GGML_TENSOR_UNARY_OP_LOCALS \
|
#define GGML_TENSOR_UNARY_OP_LOCALS \
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
||||||
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); \
|
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne); \
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
||||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||||
|
|
||||||
#define GGML_TENSOR_BINARY_OP_LOCALS \
|
#define GGML_TENSOR_BINARY_OP_LOCALS \
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
||||||
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); \
|
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne); \
|
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
|
||||||
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb); \
|
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne); \
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
||||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||||
|
|
||||||
#if defined(GGML_USE_ACCELERATE)
|
#if defined(GGML_USE_ACCELERATE)
|
||||||
#include <Accelerate/Accelerate.h>
|
#include <Accelerate/Accelerate.h>
|
||||||
@ -1866,7 +1866,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
|||||||
#define GGML_F16x8_ADD vaddq_f16
|
#define GGML_F16x8_ADD vaddq_f16
|
||||||
#define GGML_F16x8_MUL vmulq_f16
|
#define GGML_F16x8_MUL vmulq_f16
|
||||||
#define GGML_F16x8_REDUCE(res, x) \
|
#define GGML_F16x8_REDUCE(res, x) \
|
||||||
{ \
|
do { \
|
||||||
int offset = GGML_F16_ARR >> 1; \
|
int offset = GGML_F16_ARR >> 1; \
|
||||||
for (int i = 0; i < offset; ++i) { \
|
for (int i = 0; i < offset; ++i) { \
|
||||||
x[i] = vaddq_f16(x[i], x[offset+i]); \
|
x[i] = vaddq_f16(x[i], x[offset+i]); \
|
||||||
@ -1882,7 +1882,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
|||||||
const float32x4_t t0 = vcvt_f32_f16(vget_low_f16 (x[0])); \
|
const float32x4_t t0 = vcvt_f32_f16(vget_low_f16 (x[0])); \
|
||||||
const float32x4_t t1 = vcvt_f32_f16(vget_high_f16(x[0])); \
|
const float32x4_t t1 = vcvt_f32_f16(vget_high_f16(x[0])); \
|
||||||
res = (ggml_float) vaddvq_f32(vaddq_f32(t0, t1)); \
|
res = (ggml_float) vaddvq_f32(vaddq_f32(t0, t1)); \
|
||||||
}
|
} while (0)
|
||||||
|
|
||||||
#define GGML_F16_VEC GGML_F16x8
|
#define GGML_F16_VEC GGML_F16x8
|
||||||
#define GGML_F16_VEC_ZERO GGML_F16x8_ZERO
|
#define GGML_F16_VEC_ZERO GGML_F16x8_ZERO
|
||||||
@ -1943,7 +1943,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
|||||||
#define GGML_F32x8_ADD _mm256_add_ps
|
#define GGML_F32x8_ADD _mm256_add_ps
|
||||||
#define GGML_F32x8_MUL _mm256_mul_ps
|
#define GGML_F32x8_MUL _mm256_mul_ps
|
||||||
#define GGML_F32x8_REDUCE(res, x) \
|
#define GGML_F32x8_REDUCE(res, x) \
|
||||||
{ \
|
do { \
|
||||||
int offset = GGML_F32_ARR >> 1; \
|
int offset = GGML_F32_ARR >> 1; \
|
||||||
for (int i = 0; i < offset; ++i) { \
|
for (int i = 0; i < offset; ++i) { \
|
||||||
x[i] = _mm256_add_ps(x[i], x[offset+i]); \
|
x[i] = _mm256_add_ps(x[i], x[offset+i]); \
|
||||||
@ -1960,7 +1960,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
|||||||
_mm256_extractf128_ps(x[0], 1)); \
|
_mm256_extractf128_ps(x[0], 1)); \
|
||||||
const __m128 t1 = _mm_hadd_ps(t0, t0); \
|
const __m128 t1 = _mm_hadd_ps(t0, t0); \
|
||||||
res = _mm_cvtss_f32(_mm_hadd_ps(t1, t1)); \
|
res = _mm_cvtss_f32(_mm_hadd_ps(t1, t1)); \
|
||||||
}
|
} while (0)
|
||||||
// TODO: is this optimal ?
|
// TODO: is this optimal ?
|
||||||
|
|
||||||
#define GGML_F32_VEC GGML_F32x8
|
#define GGML_F32_VEC GGML_F32x8
|
||||||
@ -5154,31 +5154,31 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) {
|
|||||||
{
|
{
|
||||||
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
|
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
|
||||||
return ((int8_t *)(tensor->data))[i];
|
return ((int8_t *)(tensor->data))[i];
|
||||||
} break;
|
}
|
||||||
case GGML_TYPE_I16:
|
case GGML_TYPE_I16:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
|
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
|
||||||
return ((int16_t *)(tensor->data))[i];
|
return ((int16_t *)(tensor->data))[i];
|
||||||
} break;
|
}
|
||||||
case GGML_TYPE_I32:
|
case GGML_TYPE_I32:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
|
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
|
||||||
return ((int32_t *)(tensor->data))[i];
|
return ((int32_t *)(tensor->data))[i];
|
||||||
} break;
|
}
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
||||||
return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
||||||
} break;
|
}
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(tensor->nb[0] == sizeof(float));
|
GGML_ASSERT(tensor->nb[0] == sizeof(float));
|
||||||
return ((float *)(tensor->data))[i];
|
return ((float *)(tensor->data))[i];
|
||||||
} break;
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
} break;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0.0f;
|
return 0.0f;
|
||||||
@ -5228,29 +5228,17 @@ int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i
|
|||||||
void * data = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3];
|
void * data = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3];
|
||||||
switch (tensor->type) {
|
switch (tensor->type) {
|
||||||
case GGML_TYPE_I8:
|
case GGML_TYPE_I8:
|
||||||
{
|
return ((int8_t *) data)[0];
|
||||||
return ((int8_t *) data)[0];
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_I16:
|
case GGML_TYPE_I16:
|
||||||
{
|
return ((int16_t *) data)[0];
|
||||||
return ((int16_t *) data)[0];
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_I32:
|
case GGML_TYPE_I32:
|
||||||
{
|
return ((int32_t *) data)[0];
|
||||||
return ((int32_t *) data)[0];
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
||||||
return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
return ((float *) data)[0];
|
||||||
return ((float *) data)[0];
|
|
||||||
} break;
|
|
||||||
default:
|
default:
|
||||||
{
|
GGML_ASSERT(false);
|
||||||
GGML_ASSERT(false);
|
|
||||||
} break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0.0f;
|
return 0.0f;
|
||||||
@ -5297,31 +5285,31 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
|
|||||||
{
|
{
|
||||||
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
|
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
|
||||||
return ((int8_t *)(tensor->data))[i];
|
return ((int8_t *)(tensor->data))[i];
|
||||||
} break;
|
}
|
||||||
case GGML_TYPE_I16:
|
case GGML_TYPE_I16:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
|
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
|
||||||
return ((int16_t *)(tensor->data))[i];
|
return ((int16_t *)(tensor->data))[i];
|
||||||
} break;
|
}
|
||||||
case GGML_TYPE_I32:
|
case GGML_TYPE_I32:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
|
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
|
||||||
return ((int32_t *)(tensor->data))[i];
|
return ((int32_t *)(tensor->data))[i];
|
||||||
} break;
|
}
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
||||||
return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
||||||
} break;
|
}
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(tensor->nb[0] == sizeof(float));
|
GGML_ASSERT(tensor->nb[0] == sizeof(float));
|
||||||
return ((float *)(tensor->data))[i];
|
return ((float *)(tensor->data))[i];
|
||||||
} break;
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
} break;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0.0f;
|
return 0.0f;
|
||||||
@ -5371,29 +5359,17 @@ float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
|||||||
void * data = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3];
|
void * data = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3];
|
||||||
switch (tensor->type) {
|
switch (tensor->type) {
|
||||||
case GGML_TYPE_I8:
|
case GGML_TYPE_I8:
|
||||||
{
|
return ((int8_t *) data)[0];
|
||||||
return ((int8_t *) data)[0];
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_I16:
|
case GGML_TYPE_I16:
|
||||||
{
|
return ((int16_t *) data)[0];
|
||||||
return ((int16_t *) data)[0];
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_I32:
|
case GGML_TYPE_I32:
|
||||||
{
|
return ((int32_t *) data)[0];
|
||||||
return ((int32_t *) data)[0];
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
||||||
return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
return ((float *) data)[0];
|
||||||
return ((float *) data)[0];
|
|
||||||
} break;
|
|
||||||
default:
|
default:
|
||||||
{
|
GGML_ASSERT(false);
|
||||||
GGML_ASSERT(false);
|
|
||||||
} break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0.0f;
|
return 0.0f;
|
||||||
@ -8542,7 +8518,7 @@ static void ggml_compute_forward_dup_f16(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
const int ith = params->ith; // thread index
|
const int ith = params->ith; // thread index
|
||||||
const int nth = params->nth; // number of threads
|
const int nth = params->nth; // number of threads
|
||||||
@ -8813,7 +8789,7 @@ static void ggml_compute_forward_dup_f32(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
const int ith = params->ith; // thread index
|
const int ith = params->ith; // thread index
|
||||||
const int nth = params->nth; // number of threads
|
const int nth = params->nth; // number of threads
|
||||||
@ -9094,7 +9070,7 @@ static void ggml_compute_forward_add_f32(
|
|||||||
|
|
||||||
const int nr = ggml_nrows(src0);
|
const int nr = ggml_nrows(src0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
GGML_ASSERT( nb0 == sizeof(float));
|
GGML_ASSERT( nb0 == sizeof(float));
|
||||||
GGML_ASSERT(nb00 == sizeof(float));
|
GGML_ASSERT(nb00 == sizeof(float));
|
||||||
@ -9167,7 +9143,7 @@ static void ggml_compute_forward_add_f16_f32(
|
|||||||
|
|
||||||
const int nr = ggml_nrows(src0);
|
const int nr = ggml_nrows(src0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||||
@ -9221,7 +9197,7 @@ static void ggml_compute_forward_add_f16_f16(
|
|||||||
|
|
||||||
const int nr = ggml_nrows(src0);
|
const int nr = ggml_nrows(src0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
||||||
@ -9272,7 +9248,7 @@ static void ggml_compute_forward_add_q_f32(
|
|||||||
|
|
||||||
const int nr = ggml_nrows(src0);
|
const int nr = ggml_nrows(src0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -9398,7 +9374,7 @@ static void ggml_compute_forward_add1_f32(
|
|||||||
|
|
||||||
const int nr = ggml_nrows(src0);
|
const int nr = ggml_nrows(src0);
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
GGML_ASSERT( nb0 == sizeof(float));
|
GGML_ASSERT( nb0 == sizeof(float));
|
||||||
GGML_ASSERT(nb00 == sizeof(float));
|
GGML_ASSERT(nb00 == sizeof(float));
|
||||||
@ -9453,7 +9429,7 @@ static void ggml_compute_forward_add1_f16_f32(
|
|||||||
|
|
||||||
const int nr = ggml_nrows(src0);
|
const int nr = ggml_nrows(src0);
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||||
@ -9503,7 +9479,7 @@ static void ggml_compute_forward_add1_f16_f16(
|
|||||||
|
|
||||||
const int nr = ggml_nrows(src0);
|
const int nr = ggml_nrows(src0);
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
||||||
@ -9553,7 +9529,7 @@ static void ggml_compute_forward_add1_q_f32(
|
|||||||
|
|
||||||
const int nr = ggml_nrows(src0);
|
const int nr = ggml_nrows(src0);
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
const enum ggml_type type = src0->type;
|
const enum ggml_type type = src0->type;
|
||||||
ggml_to_float_t const dequantize_row_q = type_traits[type].to_float;
|
ggml_to_float_t const dequantize_row_q = type_traits[type].to_float;
|
||||||
@ -9681,8 +9657,8 @@ static void ggml_compute_forward_acc_f32(
|
|||||||
const int nr = ggml_nrows(src1);
|
const int nr = ggml_nrows(src1);
|
||||||
const int nc = src1->ne[0];
|
const int nc = src1->ne[0];
|
||||||
|
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
|
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
|
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
|
||||||
|
|
||||||
// src0 and dst as viewed during acc
|
// src0 and dst as viewed during acc
|
||||||
const size_t nb0 = ggml_element_size(src0);
|
const size_t nb0 = ggml_element_size(src0);
|
||||||
@ -9771,7 +9747,7 @@ static void ggml_compute_forward_sub_f32(
|
|||||||
|
|
||||||
const int nr = ggml_nrows(src0);
|
const int nr = ggml_nrows(src0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
GGML_ASSERT( nb0 == sizeof(float));
|
GGML_ASSERT( nb0 == sizeof(float));
|
||||||
GGML_ASSERT(nb00 == sizeof(float));
|
GGML_ASSERT(nb00 == sizeof(float));
|
||||||
@ -9861,7 +9837,7 @@ static void ggml_compute_forward_mul_f32(
|
|||||||
|
|
||||||
const int64_t nr = ggml_nrows(src0);
|
const int64_t nr = ggml_nrows(src0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
GGML_ASSERT( nb0 == sizeof(float));
|
GGML_ASSERT( nb0 == sizeof(float));
|
||||||
GGML_ASSERT(nb00 == sizeof(float));
|
GGML_ASSERT(nb00 == sizeof(float));
|
||||||
@ -9952,7 +9928,7 @@ static void ggml_compute_forward_div_f32(
|
|||||||
|
|
||||||
const int nr = ggml_nrows(src0);
|
const int nr = ggml_nrows(src0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
GGML_ASSERT( nb0 == sizeof(float));
|
GGML_ASSERT( nb0 == sizeof(float));
|
||||||
GGML_ASSERT(nb00 == sizeof(float));
|
GGML_ASSERT(nb00 == sizeof(float));
|
||||||
@ -10161,8 +10137,8 @@ static void ggml_compute_forward_sum_f32(
|
|||||||
assert(ggml_is_scalar(dst));
|
assert(ggml_is_scalar(dst));
|
||||||
assert(src0->nb[0] == sizeof(float));
|
assert(src0->nb[0] == sizeof(float));
|
||||||
|
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
|
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb)
|
||||||
|
|
||||||
ggml_float sum = 0;
|
ggml_float sum = 0;
|
||||||
ggml_float row_sum = 0;
|
ggml_float row_sum = 0;
|
||||||
@ -10193,8 +10169,8 @@ static void ggml_compute_forward_sum_f16(
|
|||||||
|
|
||||||
assert(src0->nb[0] == sizeof(ggml_fp16_t));
|
assert(src0->nb[0] == sizeof(ggml_fp16_t));
|
||||||
|
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
|
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb)
|
||||||
|
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
float row_sum = 0;
|
float row_sum = 0;
|
||||||
@ -10247,7 +10223,7 @@ static void ggml_compute_forward_sum_rows_f32(
|
|||||||
GGML_ASSERT(src0->nb[0] == sizeof(float));
|
GGML_ASSERT(src0->nb[0] == sizeof(float));
|
||||||
GGML_ASSERT(dst->nb[0] == sizeof(float));
|
GGML_ASSERT(dst->nb[0] == sizeof(float));
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
GGML_ASSERT(ne0 == 1);
|
GGML_ASSERT(ne0 == 1);
|
||||||
GGML_ASSERT(ne1 == ne01);
|
GGML_ASSERT(ne1 == ne01);
|
||||||
@ -10297,7 +10273,7 @@ static void ggml_compute_forward_mean_f32(
|
|||||||
|
|
||||||
assert(src0->nb[0] == sizeof(float));
|
assert(src0->nb[0] == sizeof(float));
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
assert(ne0 == 1);
|
assert(ne0 == 1);
|
||||||
assert(ne1 == ne01);
|
assert(ne1 == ne01);
|
||||||
@ -10397,7 +10373,7 @@ static void ggml_compute_forward_repeat_f32(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
// guaranteed to be an integer due to the check in ggml_can_repeat
|
// guaranteed to be an integer due to the check in ggml_can_repeat
|
||||||
const int nr0 = (int)(ne0/ne00);
|
const int nr0 = (int)(ne0/ne00);
|
||||||
@ -10508,7 +10484,7 @@ static void ggml_compute_forward_repeat_back_f32(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
// guaranteed to be an integer due to the check in ggml_can_repeat
|
// guaranteed to be an integer due to the check in ggml_can_repeat
|
||||||
const int nr0 = (int)(ne00/ne0);
|
const int nr0 = (int)(ne00/ne0);
|
||||||
@ -10586,7 +10562,7 @@ static void ggml_compute_forward_concat_f32(
|
|||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
// TODO: support for transposed / permuted tensors
|
// TODO: support for transposed / permuted tensors
|
||||||
GGML_ASSERT(nb0 == sizeof(float));
|
GGML_ASSERT(nb0 == sizeof(float));
|
||||||
@ -11188,7 +11164,7 @@ static void ggml_compute_forward_norm_f32(
|
|||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
float eps;
|
float eps;
|
||||||
memcpy(&eps, dst->op_params, sizeof(float));
|
memcpy(&eps, dst->op_params, sizeof(float));
|
||||||
@ -11257,7 +11233,7 @@ static void ggml_compute_forward_rms_norm_f32(
|
|||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
float eps;
|
float eps;
|
||||||
memcpy(&eps, dst->op_params, sizeof(float));
|
memcpy(&eps, dst->op_params, sizeof(float));
|
||||||
@ -11322,7 +11298,7 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
|||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
float eps;
|
float eps;
|
||||||
memcpy(&eps, dst->op_params, sizeof(float));
|
memcpy(&eps, dst->op_params, sizeof(float));
|
||||||
@ -11497,7 +11473,7 @@ static void ggml_compute_forward_group_norm_f32(
|
|||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
const float eps = 1e-6f; // TODO: make this a parameter
|
const float eps = 1e-6f; // TODO: make this a parameter
|
||||||
|
|
||||||
@ -11608,7 +11584,7 @@ static void ggml_compute_forward_mul_mat(
|
|||||||
int64_t t0 = ggml_perf_time_us();
|
int64_t t0 = ggml_perf_time_us();
|
||||||
UNUSED(t0);
|
UNUSED(t0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -11826,7 +11802,7 @@ static void ggml_compute_forward_out_prod_f32(
|
|||||||
// int64_t t0 = ggml_perf_time_us();
|
// int64_t t0 = ggml_perf_time_us();
|
||||||
// UNUSED(t0);
|
// UNUSED(t0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -12200,8 +12176,8 @@ static void ggml_compute_forward_set_f32(
|
|||||||
const int nr = ggml_nrows(src1);
|
const int nr = ggml_nrows(src1);
|
||||||
const int nc = src1->ne[0];
|
const int nc = src1->ne[0];
|
||||||
|
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
|
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
|
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
|
||||||
|
|
||||||
// src0 and dst as viewed during set
|
// src0 and dst as viewed during set
|
||||||
const size_t nb0 = ggml_element_size(src0);
|
const size_t nb0 = ggml_element_size(src0);
|
||||||
@ -12588,7 +12564,7 @@ static void ggml_compute_forward_diag_f32(
|
|||||||
|
|
||||||
// TODO: handle transposed/permuted matrices
|
// TODO: handle transposed/permuted matrices
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
GGML_ASSERT(ne00 == ne0);
|
GGML_ASSERT(ne00 == ne0);
|
||||||
GGML_ASSERT(ne00 == ne1);
|
GGML_ASSERT(ne00 == ne1);
|
||||||
@ -13163,7 +13139,7 @@ static void ggml_compute_forward_rope_f32(
|
|||||||
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
|
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
|
||||||
memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
|
memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
||||||
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
||||||
@ -13295,7 +13271,7 @@ static void ggml_compute_forward_rope_f16(
|
|||||||
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
||||||
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
||||||
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
||||||
@ -13458,7 +13434,7 @@ static void ggml_compute_forward_rope_back_f32(
|
|||||||
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
|
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
|
||||||
memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
|
memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
||||||
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
||||||
@ -13558,7 +13534,7 @@ static void ggml_compute_forward_rope_back_f16(
|
|||||||
const int n_dims = ((int32_t *) dst->op_params)[1];
|
const int n_dims = ((int32_t *) dst->op_params)[1];
|
||||||
const int mode = ((int32_t *) dst->op_params)[2];
|
const int mode = ((int32_t *) dst->op_params)[2];
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
||||||
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
||||||
@ -13672,7 +13648,7 @@ static void ggml_compute_forward_conv_1d_s1_ph_f16_f32(
|
|||||||
int64_t t0 = ggml_perf_time_us();
|
int64_t t0 = ggml_perf_time_us();
|
||||||
UNUSED(t0);
|
UNUSED(t0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -13763,7 +13739,7 @@ static void ggml_compute_forward_conv_1d_s1_ph_f32(
|
|||||||
int64_t t0 = ggml_perf_time_us();
|
int64_t t0 = ggml_perf_time_us();
|
||||||
UNUSED(t0);
|
UNUSED(t0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -13875,7 +13851,7 @@ static void ggml_compute_forward_conv_1d_s2_ph_f16_f32(
|
|||||||
int64_t t0 = ggml_perf_time_us();
|
int64_t t0 = ggml_perf_time_us();
|
||||||
UNUSED(t0);
|
UNUSED(t0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -13966,7 +13942,7 @@ static void ggml_compute_forward_conv_1d_s2_ph_f32(
|
|||||||
int64_t t0 = ggml_perf_time_us();
|
int64_t t0 = ggml_perf_time_us();
|
||||||
UNUSED(t0);
|
UNUSED(t0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -14084,7 +14060,7 @@ static void ggml_compute_forward_conv_1d(
|
|||||||
ggml_compute_forward_conv_1d_s2_ph(params, src0, src1, dst);
|
ggml_compute_forward_conv_1d_s2_ph(params, src0, src1, dst);
|
||||||
} else {
|
} else {
|
||||||
GGML_ASSERT(false); // only stride 1 and 2 supported
|
GGML_ASSERT(false); // only stride 1 and 2 supported
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ggml_compute_forward_conv_2d
|
// ggml_compute_forward_conv_2d
|
||||||
@ -14101,7 +14077,7 @@ static void ggml_compute_forward_conv_2d_f16_f32(
|
|||||||
int64_t t0 = ggml_perf_time_us();
|
int64_t t0 = ggml_perf_time_us();
|
||||||
UNUSED(t0);
|
UNUSED(t0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -14221,7 +14197,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|||||||
int64_t t0 = ggml_perf_time_us();
|
int64_t t0 = ggml_perf_time_us();
|
||||||
UNUSED(t0);
|
UNUSED(t0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -14480,7 +14456,7 @@ static void ggml_compute_forward_upscale_f32(
|
|||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
const int scale_factor = dst->op_params[0];
|
const int scale_factor = dst->op_params[0];
|
||||||
|
|
||||||
@ -14532,14 +14508,14 @@ static void ggml_compute_forward_flash_attn_f32(
|
|||||||
int64_t t0 = ggml_perf_time_us();
|
int64_t t0 = ggml_perf_time_us();
|
||||||
UNUSED(t0);
|
UNUSED(t0);
|
||||||
|
|
||||||
GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
|
GGML_TENSOR_LOCALS(int64_t, neq, q, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
|
GGML_TENSOR_LOCALS(size_t, nbq, q, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
|
GGML_TENSOR_LOCALS(int64_t, nek, k, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
|
GGML_TENSOR_LOCALS(size_t, nbk, k, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
|
GGML_TENSOR_LOCALS(int64_t, nev, v, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
|
GGML_TENSOR_LOCALS(size_t, nbv, v, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -14722,14 +14698,14 @@ static void ggml_compute_forward_flash_attn_f16(
|
|||||||
int64_t t0 = ggml_perf_time_us();
|
int64_t t0 = ggml_perf_time_us();
|
||||||
UNUSED(t0);
|
UNUSED(t0);
|
||||||
|
|
||||||
GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
|
GGML_TENSOR_LOCALS(int64_t, neq, q, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
|
GGML_TENSOR_LOCALS(size_t, nbq, q, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
|
GGML_TENSOR_LOCALS(int64_t, nek, k, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
|
GGML_TENSOR_LOCALS(size_t, nbk, k, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
|
GGML_TENSOR_LOCALS(int64_t, nev, v, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
|
GGML_TENSOR_LOCALS(size_t, nbv, v, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -14974,18 +14950,18 @@ static void ggml_compute_forward_flash_ff_f16(
|
|||||||
int64_t t0 = ggml_perf_time_us();
|
int64_t t0 = ggml_perf_time_us();
|
||||||
UNUSED(t0);
|
UNUSED(t0);
|
||||||
|
|
||||||
GGML_TENSOR_LOCALS(int64_t, nea, a, ne);
|
GGML_TENSOR_LOCALS(int64_t, nea, a, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nba, a, nb);
|
GGML_TENSOR_LOCALS(size_t, nba, a, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, neb0, b0, ne);
|
GGML_TENSOR_LOCALS(int64_t, neb0, b0, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbb0, b0, nb);
|
GGML_TENSOR_LOCALS(size_t, nbb0, b0, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, neb1, b1, ne);
|
GGML_TENSOR_LOCALS(int64_t, neb1, b1, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbb1, b1, nb);
|
GGML_TENSOR_LOCALS(size_t, nbb1, b1, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, nec0, c0, ne);
|
GGML_TENSOR_LOCALS(int64_t, nec0, c0, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbc0, c0, nb);
|
GGML_TENSOR_LOCALS(size_t, nbc0, c0, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, nec1, c1, ne);
|
GGML_TENSOR_LOCALS(int64_t, nec1, c1, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbc1, c1, nb);
|
GGML_TENSOR_LOCALS(size_t, nbc1, c1, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -15133,16 +15109,16 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|||||||
int64_t t0 = ggml_perf_time_us();
|
int64_t t0 = ggml_perf_time_us();
|
||||||
UNUSED(t0);
|
UNUSED(t0);
|
||||||
|
|
||||||
GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
|
GGML_TENSOR_LOCALS(int64_t, neq, q, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
|
GGML_TENSOR_LOCALS(size_t, nbq, q, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
|
GGML_TENSOR_LOCALS(int64_t, nek, k, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
|
GGML_TENSOR_LOCALS(size_t, nbk, k, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
|
GGML_TENSOR_LOCALS(int64_t, nev, v, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
|
GGML_TENSOR_LOCALS(size_t, nbv, v, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, ned, d, ne);
|
GGML_TENSOR_LOCALS(int64_t, ned, d, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nbd, d, nb);
|
GGML_TENSOR_LOCALS(size_t, nbd, d, nb)
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
|
||||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -15505,8 +15481,8 @@ static void ggml_compute_forward_win_part_f32(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
|
||||||
|
|
||||||
const int32_t nep0 = ((const int32_t *)(dst->op_params))[0];
|
const int32_t nep0 = ((const int32_t *)(dst->op_params))[0];
|
||||||
const int32_t nep1 = ((const int32_t *)(dst->op_params))[1];
|
const int32_t nep1 = ((const int32_t *)(dst->op_params))[1];
|
||||||
@ -15567,8 +15543,8 @@ static void ggml_compute_forward_win_unpart_f32(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
|
||||||
|
|
||||||
const int32_t w = ((const int32_t *)(dst->op_params))[0];
|
const int32_t w = ((const int32_t *)(dst->op_params))[0];
|
||||||
|
|
||||||
@ -15685,7 +15661,7 @@ static void ggml_compute_forward_get_rel_pos_f16(
|
|||||||
|
|
||||||
// ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/image_encoder.py#L292-L322
|
// ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/image_encoder.py#L292-L322
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
const int64_t w = ne1;
|
const int64_t w = ne1;
|
||||||
|
|
||||||
@ -19637,7 +19613,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|||||||
(*step) *= width;
|
(*step) *= width;
|
||||||
}
|
}
|
||||||
|
|
||||||
return GGML_LINESEARCH_FAIL;
|
GGML_UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum ggml_opt_result ggml_opt_lbfgs(
|
static enum ggml_opt_result ggml_opt_lbfgs(
|
||||||
@ -19904,7 +19880,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|||||||
step[0] = 1.0;
|
step[0] = 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return GGML_OPT_DID_NOT_CONVERGE;
|
GGML_UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
||||||
@ -20638,10 +20614,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
} break;
|
} break;
|
||||||
case GGUF_TYPE_ARRAY:
|
case GGUF_TYPE_ARRAY:
|
||||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
|
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
|
||||||
};
|
}
|
||||||
} break;
|
} break;
|
||||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
|
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
|
||||||
};
|
}
|
||||||
|
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
break;
|
break;
|
||||||
@ -21369,10 +21345,10 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
|
|||||||
} break;
|
} break;
|
||||||
case GGUF_TYPE_ARRAY:
|
case GGUF_TYPE_ARRAY:
|
||||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
|
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
|
||||||
};
|
}
|
||||||
} break;
|
} break;
|
||||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
|
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// write tensor infos
|
// write tensor infos
|
||||||
|
8
ggml.h
8
ggml.h
@ -248,6 +248,14 @@
|
|||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
#define GGML_UNREACHABLE() __builtin_unreachable()
|
||||||
|
#else
|
||||||
|
#define GGML_UNREACHABLE() ((void) 0)
|
||||||
|
#endif
|
||||||
|
|
||||||
// used to copy the number of elements and stride in bytes of tensors into local variables.
|
// used to copy the number of elements and stride in bytes of tensors into local variables.
|
||||||
// main purpose is to reduce code duplication and improve readability.
|
// main purpose is to reduce code duplication and improve readability.
|
||||||
//
|
//
|
||||||
|
14
llama.cpp
14
llama.cpp
@ -449,7 +449,7 @@ struct LLM_TN {
|
|||||||
//
|
//
|
||||||
|
|
||||||
#define GGUF_GET_KEY(ctx, dst, func, type, req, key) \
|
#define GGUF_GET_KEY(ctx, dst, func, type, req, key) \
|
||||||
{ \
|
do { \
|
||||||
const std::string skey(key); \
|
const std::string skey(key); \
|
||||||
const int kid = gguf_find_key(ctx, skey.c_str()); \
|
const int kid = gguf_find_key(ctx, skey.c_str()); \
|
||||||
if (kid >= 0) { \
|
if (kid >= 0) { \
|
||||||
@ -461,7 +461,7 @@ struct LLM_TN {
|
|||||||
} else if (req) { \
|
} else if (req) { \
|
||||||
throw std::runtime_error(format("key not found in model: %s", skey.c_str())); \
|
throw std::runtime_error(format("key not found in model: %s", skey.c_str())); \
|
||||||
} \
|
} \
|
||||||
}
|
} while (0)
|
||||||
|
|
||||||
//
|
//
|
||||||
// ggml helpers
|
// ggml helpers
|
||||||
@ -1913,7 +1913,7 @@ static void llm_load_hparams(
|
|||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
default: (void)0;
|
default: (void)0;
|
||||||
};
|
}
|
||||||
|
|
||||||
model.ftype = ml.ftype;
|
model.ftype = ml.ftype;
|
||||||
}
|
}
|
||||||
@ -2438,7 +2438,7 @@ static void llm_load_tensors(
|
|||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
throw std::runtime_error("unknown architecture");
|
throw std::runtime_error("unknown architecture");
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ml.done_getting_tensors();
|
ml.done_getting_tensors();
|
||||||
@ -3981,7 +3981,7 @@ static struct ggml_cgraph * llama_build_graph(
|
|||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
};
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -4626,7 +4626,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
|
|||||||
llm_tokenizer_bpe tokenizer(vocab);
|
llm_tokenizer_bpe tokenizer(vocab);
|
||||||
tokenizer.tokenize(raw_text, output);
|
tokenizer.tokenize(raw_text, output);
|
||||||
} break;
|
} break;
|
||||||
};
|
}
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
@ -7520,7 +7520,7 @@ int llama_token_to_piece(const struct llama_model * model, llama_token token, ch
|
|||||||
buf[2] = '\x85';
|
buf[2] = '\x85';
|
||||||
return 3;
|
return 3;
|
||||||
} else if (llama_is_control_token(model->vocab, token)) {
|
} else if (llama_is_control_token(model->vocab, token)) {
|
||||||
;
|
// do nothing
|
||||||
} else if (llama_is_byte_token(model->vocab, token)) {
|
} else if (llama_is_byte_token(model->vocab, token)) {
|
||||||
if (length < 1) {
|
if (length < 1) {
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -43,7 +43,7 @@ static_assert(QK4_1 == QK8_0, "QK4_1 and QK8_0 must be the same");
|
|||||||
static_assert(QK4_0 == QK8_0, "QK4_0 and QK8_0 must be the same");
|
static_assert(QK4_0 == QK8_0, "QK4_0 and QK8_0 must be the same");
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void fillQ4blocks(std::vector<T>& blocks, std::mt19937& rndm) {
|
static void fillQ4blocks(std::vector<T>& blocks, std::mt19937& rndm) {
|
||||||
for (auto& b : blocks) {
|
for (auto& b : blocks) {
|
||||||
b.d = 1;
|
b.d = 1;
|
||||||
for (int i=0; i<QK4_1/2; ++i) {
|
for (int i=0; i<QK4_1/2; ++i) {
|
||||||
@ -54,7 +54,7 @@ void fillQ4blocks(std::vector<T>& blocks, std::mt19937& rndm) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void fillQ80blocks(std::vector<block_q8_0>& blocks, std::mt19937& rndm) {
|
static void fillQ80blocks(std::vector<block_q8_0>& blocks, std::mt19937& rndm) {
|
||||||
for (auto& b : blocks) {
|
for (auto& b : blocks) {
|
||||||
b.d = 1;
|
b.d = 1;
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
@ -66,7 +66,7 @@ void fillQ80blocks(std::vector<block_q8_0>& blocks, std::mt19937& rndm) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
float simpleDot(const block_q4_0& x, const block_q8_0& y) {
|
static float simpleDot(const block_q4_0& x, const block_q8_0& y) {
|
||||||
int s1 = 0; //, s2 = 0;
|
int s1 = 0; //, s2 = 0;
|
||||||
for (int i=0; i<QK4_1/2; i+=2) {
|
for (int i=0; i<QK4_1/2; i+=2) {
|
||||||
int v1 = x.qs[i+0] & 0xf;
|
int v1 = x.qs[i+0] & 0xf;
|
||||||
@ -81,7 +81,7 @@ float simpleDot(const block_q4_0& x, const block_q8_0& y) {
|
|||||||
//return y.d * x.d * (s1 - 8 * s2);
|
//return y.d * x.d * (s1 - 8 * s2);
|
||||||
}
|
}
|
||||||
|
|
||||||
float simpleDot(const block_q4_1& x, const block_q8_0& y) {
|
static float simpleDot(const block_q4_1& x, const block_q8_0& y) {
|
||||||
int s1 = 0; //, s2 = 0;
|
int s1 = 0; //, s2 = 0;
|
||||||
for (int i=0; i<QK4_1/2; i+=2) {
|
for (int i=0; i<QK4_1/2; i+=2) {
|
||||||
int v1 = x.qs[i+0] & 0xf;
|
int v1 = x.qs[i+0] & 0xf;
|
||||||
|
@ -107,7 +107,7 @@ static struct ggml_tensor * get_random_tensor_f32(
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
};
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -155,7 +155,7 @@ static struct ggml_tensor * get_random_tensor_f16(
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
};
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -203,7 +203,7 @@ static struct ggml_tensor * get_random_tensor_i32(
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
};
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -101,7 +101,7 @@ static struct ggml_tensor * get_random_tensor(
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
};
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -124,7 +124,7 @@ int main(void) {
|
|||||||
struct ggml_context * ctx = ggml_init(params);
|
struct ggml_context * ctx = ggml_init(params);
|
||||||
|
|
||||||
int64_t ne1[4] = {4, 128, 1, 1};
|
int64_t ne1[4] = {4, 128, 1, 1};
|
||||||
int64_t ne2[4] = {4, 256, 1, 1};;
|
int64_t ne2[4] = {4, 256, 1, 1};
|
||||||
int64_t ne3[4] = {128, 256, 1, 1};
|
int64_t ne3[4] = {128, 256, 1, 1};
|
||||||
|
|
||||||
struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1);
|
struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1);
|
||||||
|
Loading…
Reference in New Issue
Block a user