diff --git a/cmake/arm64-windows-llvm.cmake b/cmake/arm64-windows-llvm.cmake
index 46fba6514..802379680 100644
--- a/cmake/arm64-windows-llvm.cmake
+++ b/cmake/arm64-windows-llvm.cmake
@@ -9,7 +9,7 @@ set( CMAKE_CXX_COMPILER  clang++ )
 set( CMAKE_C_COMPILER_TARGET   ${target} )
 set( CMAKE_CXX_COMPILER_TARGET ${target} )
 
-set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast" )
+set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
 set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
 
 set( CMAKE_C_FLAGS_INIT   "${arch_c_flags} ${warn_c_flags}" )
diff --git a/ggml.c b/ggml.c
index 000d9db7e..8869e146a 100644
--- a/ggml.c
+++ b/ggml.c
@@ -2272,6 +2272,11 @@ inline static float ggml_silu_f32(float x) {
     return x/(1.0f + expf(-x));
 }
 
+#if __FINITE_MATH_ONLY__
+#error "some routines in ggml.c require non-finite math arithmetics -- pass -fno-finite-math-only to the compiler to fix"
+#error "ref: https://github.com/ggerganov/llama.cpp/pull/7154#issuecomment-2143844461"
+#endif
+
 #if defined(__ARM_NEON) && defined(__aarch64__)
 
 // adapted from arm limited optimized routine