mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
build : fix most gcc and clang warnings (#2861)
* fix most gcc and clang warnings * baby-llama : remove commented opt_params_adam * fix some MinGW warnings * fix more MinGW warnings
This commit is contained in:
parent
d8d6977f48
commit
ef15649972
@ -403,6 +403,7 @@ if (LLAMA_ALL_WARNINGS)
|
|||||||
-Wpointer-arith
|
-Wpointer-arith
|
||||||
-Wmissing-prototypes
|
-Wmissing-prototypes
|
||||||
-Werror=implicit-int
|
-Werror=implicit-int
|
||||||
|
-Wno-unused-function
|
||||||
)
|
)
|
||||||
set(cxx_flags
|
set(cxx_flags
|
||||||
-Wall
|
-Wall
|
||||||
@ -412,6 +413,10 @@ if (LLAMA_ALL_WARNINGS)
|
|||||||
-Wno-unused-function
|
-Wno-unused-function
|
||||||
-Wno-multichar
|
-Wno-multichar
|
||||||
)
|
)
|
||||||
|
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||||
|
# g++ only
|
||||||
|
set(cxx_flags ${cxx_flags} -Wno-format-truncation)
|
||||||
|
endif()
|
||||||
else()
|
else()
|
||||||
# todo : msvc
|
# todo : msvc
|
||||||
endif()
|
endif()
|
||||||
|
7
Makefile
7
Makefile
@ -91,9 +91,14 @@ endif # LLAMA_DISABLE_LOGS
|
|||||||
|
|
||||||
# warnings
|
# warnings
|
||||||
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
|
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
|
||||||
-Wmissing-prototypes -Werror=implicit-int
|
-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
|
||||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
|
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
|
||||||
|
|
||||||
|
ifeq '' '$(findstring clang++,$(CXX))'
|
||||||
|
# g++ only
|
||||||
|
CXXFLAGS += -Wno-format-truncation
|
||||||
|
endif
|
||||||
|
|
||||||
# OS specific
|
# OS specific
|
||||||
# TODO: support Windows
|
# TODO: support Windows
|
||||||
ifeq ($(UNAME_S),Linux)
|
ifeq ($(UNAME_S),Linux)
|
||||||
|
@ -24,7 +24,9 @@
|
|||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
#define WIN32_LEAN_AND_MEAN
|
#define WIN32_LEAN_AND_MEAN
|
||||||
|
#ifndef NOMINMAX
|
||||||
# define NOMINMAX
|
# define NOMINMAX
|
||||||
|
#endif
|
||||||
#include <codecvt>
|
#include <codecvt>
|
||||||
#include <locale>
|
#include <locale>
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
@ -1027,7 +1029,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
|
|||||||
dump_string_yaml_multiline(stream, "grammar", params.grammar.c_str());
|
dump_string_yaml_multiline(stream, "grammar", params.grammar.c_str());
|
||||||
fprintf(stream, "grammar-file: # never logged, see grammar instead. Can still be specified for input.\n");
|
fprintf(stream, "grammar-file: # never logged, see grammar instead. Can still be specified for input.\n");
|
||||||
fprintf(stream, "hellaswag: %s # default: false\n", params.hellaswag ? "true" : "false");
|
fprintf(stream, "hellaswag: %s # default: false\n", params.hellaswag ? "true" : "false");
|
||||||
fprintf(stream, "hellaswag_tasks: %ld # default: 400\n", params.hellaswag_tasks);
|
fprintf(stream, "hellaswag_tasks: %zu # default: 400\n", params.hellaswag_tasks);
|
||||||
|
|
||||||
const auto logit_bias_eos = params.logit_bias.find(llama_token_eos(lctx));
|
const auto logit_bias_eos = params.logit_bias.find(llama_token_eos(lctx));
|
||||||
const bool ignore_eos = logit_bias_eos != params.logit_bias.end() && logit_bias_eos->second == -INFINITY;
|
const bool ignore_eos = logit_bias_eos != params.logit_bias.end() && logit_bias_eos->second == -INFINITY;
|
||||||
|
@ -235,6 +235,7 @@ namespace console {
|
|||||||
|
|
||||||
int estimateWidth(char32_t codepoint) {
|
int estimateWidth(char32_t codepoint) {
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
|
(void)codepoint;
|
||||||
return 1;
|
return 1;
|
||||||
#else
|
#else
|
||||||
return wcwidth(codepoint);
|
return wcwidth(codepoint);
|
||||||
|
@ -1617,15 +1617,10 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
float error_before_opt = ggml_get_f32_1d(e, 0);
|
float error_before_opt = ggml_get_f32_1d(e, 0);
|
||||||
|
|
||||||
struct ggml_opt_params opt_params_adam = ggml_opt_default_params(GGML_OPT_ADAM);
|
|
||||||
struct ggml_opt_params opt_params_lbfgs = ggml_opt_default_params(GGML_OPT_LBFGS);
|
struct ggml_opt_params opt_params_lbfgs = ggml_opt_default_params(GGML_OPT_LBFGS);
|
||||||
opt_params_adam.print_forward_graph = false;
|
|
||||||
opt_params_adam.print_backward_graph = false;
|
|
||||||
opt_params_lbfgs.print_forward_graph = false;
|
opt_params_lbfgs.print_forward_graph = false;
|
||||||
opt_params_lbfgs.print_backward_graph = false;
|
opt_params_lbfgs.print_backward_graph = false;
|
||||||
opt_params_adam.adam.n_iter = 16;
|
|
||||||
opt_params_lbfgs.lbfgs.n_iter = 16;
|
opt_params_lbfgs.lbfgs.n_iter = 16;
|
||||||
// ggml_opt(ctx0, opt_params_adam, e);
|
|
||||||
ggml_opt(ctx0, opt_params_lbfgs, e);
|
ggml_opt(ctx0, opt_params_lbfgs, e);
|
||||||
//
|
//
|
||||||
ggml_build_forward_expand(&gf, e);
|
ggml_build_forward_expand(&gf, e);
|
||||||
|
@ -22,7 +22,9 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#elif defined (_WIN32)
|
#elif defined (_WIN32)
|
||||||
#define WIN32_LEAN_AND_MEAN
|
#define WIN32_LEAN_AND_MEAN
|
||||||
|
#ifndef NOMINMAX
|
||||||
# define NOMINMAX
|
# define NOMINMAX
|
||||||
|
#endif
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#endif
|
#endif
|
||||||
@ -73,7 +75,7 @@ void beam_search_callback(void * callback_data_ptr, llama_beams_state beams_stat
|
|||||||
assert(0u < beams_state.n_beams);
|
assert(0u < beams_state.n_beams);
|
||||||
const llama_token * tokens = beams_state.beam_views[0].tokens;
|
const llama_token * tokens = beams_state.beam_views[0].tokens;
|
||||||
std::copy(tokens, tokens + n, callback_data.response.end() - n);
|
std::copy(tokens, tokens + n, callback_data.response.end() - n);
|
||||||
printf("%lu", n);
|
printf("%zu", n);
|
||||||
}
|
}
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
#if 1 // DEBUG: print current beams for this iteration
|
#if 1 // DEBUG: print current beams for this iteration
|
||||||
@ -145,7 +147,7 @@ int main(int argc, char ** argv)
|
|||||||
|
|
||||||
if (tokens_list.size() > max_tokens_list_size)
|
if (tokens_list.size() > max_tokens_list_size)
|
||||||
{
|
{
|
||||||
fprintf( stderr , "%s: error: prompt too long (%lu tokens, max %lu)\n" ,
|
fprintf( stderr , "%s: error: prompt too long (%zu tokens, max %zu)\n" ,
|
||||||
__func__ , tokens_list.size() , max_tokens_list_size );
|
__func__ , tokens_list.size() , max_tokens_list_size );
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,8 @@
|
|||||||
#include "completion.js.hpp"
|
#include "completion.js.hpp"
|
||||||
#include "json-schema-to-grammar.mjs.hpp"
|
#include "json-schema-to-grammar.mjs.hpp"
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
#ifndef SERVER_VERBOSE
|
#ifndef SERVER_VERBOSE
|
||||||
#define SERVER_VERBOSE 1
|
#define SERVER_VERBOSE 1
|
||||||
#endif
|
#endif
|
||||||
@ -1038,7 +1040,7 @@ static json format_timings(llama_server_context &llama)
|
|||||||
{
|
{
|
||||||
const auto timings = llama_get_timings(llama.ctx);
|
const auto timings = llama_get_timings(llama.ctx);
|
||||||
|
|
||||||
assert(timings.n_eval == llama.num_tokens_predicted);
|
assert(timings.n_eval == ptrdiff_t(llama.num_tokens_predicted));
|
||||||
|
|
||||||
return json{
|
return json{
|
||||||
{"prompt_n", timings.n_p_eval},
|
{"prompt_n", timings.n_p_eval},
|
||||||
@ -1239,7 +1241,7 @@ void beam_search_callback(void * callback_data, llama_beams_state beams_state) {
|
|||||||
const llama_token * tokens = beams_state.beam_views[0].tokens;
|
const llama_token * tokens = beams_state.beam_views[0].tokens;
|
||||||
const auto map = [](llama_token tok) { return completion_token_output{{},tok}; };
|
const auto map = [](llama_token tok) { return completion_token_output{{},tok}; };
|
||||||
std::transform(tokens, tokens + n, llama.generated_token_probs.end() - n, map);
|
std::transform(tokens, tokens + n, llama.generated_token_probs.end() - n, map);
|
||||||
printf("%lu", n);
|
printf("%zu", n);
|
||||||
}
|
}
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
#if 0 // DEBUG: print current beams for this iteration
|
#if 0 // DEBUG: print current beams for this iteration
|
||||||
@ -1548,7 +1550,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
svr.set_exception_handler([](const Request &, Response &res, std::exception_ptr ep)
|
svr.set_exception_handler([](const Request &, Response &res, std::exception_ptr ep)
|
||||||
{
|
{
|
||||||
const auto * fmt = "500 Internal Server Error\n%s";
|
const char fmt[] = "500 Internal Server Error\n%s";
|
||||||
char buf[BUFSIZ];
|
char buf[BUFSIZ];
|
||||||
try {
|
try {
|
||||||
std::rethrow_exception(std::move(ep));
|
std::rethrow_exception(std::move(ep));
|
||||||
|
@ -183,13 +183,9 @@ static float make_qkx1_quants(int n, int nmax, const float * restrict x, uint8_t
|
|||||||
int ntry, float alpha) {
|
int ntry, float alpha) {
|
||||||
float min = x[0];
|
float min = x[0];
|
||||||
float max = x[0];
|
float max = x[0];
|
||||||
float sum_x = 0;
|
|
||||||
float sum_x2 = 0;
|
|
||||||
for (int i = 1; i < n; ++i) {
|
for (int i = 1; i < n; ++i) {
|
||||||
if (x[i] < min) min = x[i];
|
if (x[i] < min) min = x[i];
|
||||||
if (x[i] > max) max = x[i];
|
if (x[i] > max) max = x[i];
|
||||||
sum_x += x[i];
|
|
||||||
sum_x2 += x[i]*x[i];
|
|
||||||
}
|
}
|
||||||
if (max == min) {
|
if (max == min) {
|
||||||
for (int i = 0; i < n; ++i) L[i] = 0;
|
for (int i = 0; i < n; ++i) L[i] = 0;
|
||||||
@ -2060,7 +2056,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
__m256 acc = _mm256_setzero_ps();
|
__m256 acc = _mm256_setzero_ps();
|
||||||
|
|
||||||
uint32_t *aux;
|
const uint32_t *aux;
|
||||||
|
|
||||||
for (int i = 0; i < nb; ++i) {
|
for (int i = 0; i < nb; ++i) {
|
||||||
|
|
||||||
@ -2070,7 +2066,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
const int8_t * restrict q8 = y[i].qs;
|
const int8_t * restrict q8 = y[i].qs;
|
||||||
|
|
||||||
// Set up scales
|
// Set up scales
|
||||||
aux = (uint32_t *)x[i].scales;
|
aux = (const uint32_t *)x[i].scales;
|
||||||
__m128i scales128 = _mm_set_epi32(
|
__m128i scales128 = _mm_set_epi32(
|
||||||
((aux[1] >> 4) & kmask2) | (((aux[2] >> 6) & kmask1) << 4),
|
((aux[1] >> 4) & kmask2) | (((aux[2] >> 6) & kmask1) << 4),
|
||||||
((aux[0] >> 4) & kmask2) | (((aux[2] >> 4) & kmask1) << 4),
|
((aux[0] >> 4) & kmask2) | (((aux[2] >> 4) & kmask1) << 4),
|
||||||
|
@ -3600,7 +3600,7 @@ static void llama_grammar_advance_stack(
|
|||||||
std::vector<std::vector<const llama_grammar_element *>> & new_stacks) {
|
std::vector<std::vector<const llama_grammar_element *>> & new_stacks) {
|
||||||
|
|
||||||
if (stack.empty()) {
|
if (stack.empty()) {
|
||||||
new_stacks.push_back(stack);
|
new_stacks.emplace_back(stack);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3637,7 +3637,7 @@ static void llama_grammar_advance_stack(
|
|||||||
}
|
}
|
||||||
case LLAMA_GRETYPE_CHAR:
|
case LLAMA_GRETYPE_CHAR:
|
||||||
case LLAMA_GRETYPE_CHAR_NOT:
|
case LLAMA_GRETYPE_CHAR_NOT:
|
||||||
new_stacks.push_back(stack);
|
new_stacks.emplace_back(stack);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
// end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range
|
// end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range
|
||||||
|
Loading…
Reference in New Issue
Block a user