diff --git a/ggml-impl.h b/ggml-impl.h
index 3579018be..e551786e8 100644
--- a/ggml-impl.h
+++ b/ggml-impl.h
@@ -4,6 +4,7 @@
 
 // GGML internal header
 
+#include <assert.h>
 #include <stddef.h>
 #include <stdbool.h>
 #include <string.h> // memcpy
diff --git a/ggml-quants.h b/ggml-quants.h
index d88f99e33..70c12c274 100644
--- a/ggml-quants.h
+++ b/ggml-quants.h
@@ -1,22 +1,12 @@
 #pragma once
 
-// This is a private API for quantization and dequantization
-// Should not be used directly, use ggml.h instead
+#include "ggml-impl.h"
 
-#include "ggml.h"
+// GGML internal header
 
 #include <stdint.h>
-#include <assert.h>
 #include <stddef.h>
 
-#ifndef static_assert
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
-#define static_assert(cond, msg) _Static_assert(cond, msg)
-#else
-#define static_assert(cond, msg) struct global_scope_noop_trick
-#endif
-#endif
-
 #define QK4_0 32
 typedef struct {
     ggml_fp16_t d;          // delta
diff --git a/llama.cpp b/llama.cpp
index a4340d527..e599917a8 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1467,7 +1467,7 @@ static int32_t llama_kv_cache_cell_max(const struct llama_kv_cache & cache) {
 }
 
 static void llama_kv_cache_clear(struct llama_kv_cache & cache) {
-    for (int32_t i = 0; i < cache.size; ++i) {
+    for (int32_t i = 0; i < (int32_t) cache.size; ++i) {
         cache.cells[i].pos = -1;
         cache.cells[i].seq_id.clear();
     }