diff --git a/examples/common.cpp b/examples/common.cpp
index a6abc4977..a4fea4af4 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -749,7 +749,7 @@ bool console_readline(console_state & con_st, std::string & line) {
             break;
         }
 
-        if (input_char == WEOF || input_char == 0x04 /* Ctrl+D*/) {
+        if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) {
             end_of_stream = true;
             break;
         }
@@ -764,7 +764,7 @@ bool console_readline(console_state & con_st, std::string & line) {
             char32_t code = getchar32();
             if (code == '[' || code == 0x1B) {
                 // Discard the rest of the escape sequence
-                while ((code = getchar32()) != WEOF) {
+                while ((code = getchar32()) != (char32_t) WEOF) {
                     if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') {
                         break;
                     }
diff --git a/examples/common.h b/examples/common.h
index 2ad20ba50..2b66382a6 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -44,15 +44,15 @@ struct gpt_params {
     float   mirostat_tau      = 5.00f; // target entropy
     float   mirostat_eta      = 0.10f; // learning rate
 
-    std::string model  = "models/7B/ggml-model.bin"; // model path
-    std::string prompt = "";
+    std::string model             = "models/7B/ggml-model.bin"; // model path
+    std::string prompt            = "";
     std::string path_prompt_cache = "";  // path to file for saving/loading prompt eval state
     std::string input_prefix      = "";  // string to prefix user inputs with
     std::string input_suffix      = "";  // string to suffix user inputs with
     std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
 
     std::string lora_adapter = "";  // lora adapter path
-    std::string lora_base = "";     // base model path for the lora adapter
+    std::string lora_base    = "";  // base model path for the lora adapter
 
     bool memory_f16        = true;  // use f16 instead of f32 for memory kv
     bool random_prompt     = false; // do not randomize prompt if none provided
diff --git a/llama.cpp b/llama.cpp
index 1f9d37844..1802d2319 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -941,7 +941,7 @@ static void llama_model_load_internal(
     size_t ctx_size;
     size_t mmapped_size;
     ml->calc_sizes(&ctx_size, &mmapped_size);
-    fprintf(stderr, "%s: ggml ctx size = %6.2f KB\n", __func__, ctx_size/1024.0);
+    fprintf(stderr, "%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/1024.0/1024.0);
 
     // print memory requirements
     {
diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp
index 9174c1e37..ebfc17c18 100644
--- a/tests/test-sampling.cpp
+++ b/tests/test-sampling.cpp
@@ -1,14 +1,16 @@
-#include "llama.h"
 #include "ggml.h"
-#include <cassert>
-#include <cmath>
+#include "llama.h"
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
 #include <numeric>
 #include <cassert>
 #include <iostream>
 #include <vector>
 #include <algorithm>
 
-
 void dump(const llama_token_data_array * candidates) {
     for (size_t i = 0; i < candidates->size; i++) {
         printf("%d: %f (%f)\n", candidates->data[i].id, candidates->data[i].p, candidates->data[i].logit);