diff --git a/ggml-impl.h b/ggml-impl.h index 3579018be..e551786e8 100644 --- a/ggml-impl.h +++ b/ggml-impl.h @@ -4,6 +4,7 @@ // GGML internal header +#include #include #include #include // memcpy diff --git a/ggml-quants.h b/ggml-quants.h index d88f99e33..70c12c274 100644 --- a/ggml-quants.h +++ b/ggml-quants.h @@ -1,22 +1,12 @@ #pragma once -// This is a private API for quantization and dequantization -// Should not be used directly, use ggml.h instead +#include "ggml-impl.h" -#include "ggml.h" +// GGML internal header #include -#include #include -#ifndef static_assert -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L) -#define static_assert(cond, msg) _Static_assert(cond, msg) -#else -#define static_assert(cond, msg) struct global_scope_noop_trick -#endif -#endif - #define QK4_0 32 typedef struct { ggml_fp16_t d; // delta diff --git a/llama.cpp b/llama.cpp index a4340d527..e599917a8 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1467,7 +1467,7 @@ static int32_t llama_kv_cache_cell_max(const struct llama_kv_cache & cache) { } static void llama_kv_cache_clear(struct llama_kv_cache & cache) { - for (int32_t i = 0; i < cache.size; ++i) { + for (int32_t i = 0; i < (int32_t) cache.size; ++i) { cache.cells[i].pos = -1; cache.cells[i].seq_id.clear(); }