build fixes

2025-01-10 18:51:45 +00:00 · 2024-11-12 02:32:22 +01:00 · 2024-11-12 02:32:22 +01:00 · ab26fb9005
commit ab26fb9005
parent bf79cb3972
9 changed files with 175 additions and 236 deletions
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@ -176,15 +176,15 @@
 #ifdef GGML_SHARED
 #    if defined(_WIN32) && !defined(__MINGW32__)
 #        ifdef GGML_BUILD
-#            define GGML_API __declspec(dllexport)
+#            define GGML_API __declspec(dllexport) extern
 #        else
-#            define GGML_API __declspec(dllimport)
+#            define GGML_API __declspec(dllimport) extern
 #        endif
 #    else
-#        define GGML_API __attribute__ ((visibility ("default")))
+#        define GGML_API __attribute__ ((visibility ("default"))) extern
 #    endif
 #else
-#    define GGML_API
+#    define GGML_API extern
 #endif
 // TODO: support for clang
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@ -29,37 +29,6 @@ endif()
 unset(GGML_EXTRA_LIBS_PRIVATE)
 unset(GGML_EXTRA_LIBS_PUBLIC)
 # musa, hip: add directory with a CMakeLists.txt file, but no source files (use refer to ggml-cuda files as ../ggml-cuda)
 if (GGML_MUSA)
    list(APPEND CMAKE_MODULE_PATH "/usr/local/musa/cmake/")
    find_package(MUSAToolkit)
    set(CUDAToolkit_FOUND ${MUSAToolkit_FOUND})
 else()
    find_package(CUDAToolkit)
 endif()
 # if (GGML_MUSA)
 # set(CMAKE_CUDA_COMPILER ${MUSAToolkit_MCC_EXECUTABLE})
 # else()
 # if (GGML_MUSA)
 # set_source_files_properties(${GGML_SOURCES_CUDA} PROPERTIES LANGUAGE CXX)
 # foreach(SOURCE ${GGML_SOURCES_CUDA})
 #     set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22")
 # endforeach()
 # endif()
 if (GGML_MUSA)
    set(CMAKE_C_COMPILER clang)
    set(CMAKE_C_EXTENSIONS OFF)
    set(CMAKE_CXX_COMPILER clang++)
    set(CMAKE_CXX_EXTENSIONS OFF)
    set(GGML_CUDA ON)
    list(APPEND GGML_CDEF_PUBLIC GGML_USE_MUSA)
 endif()
 if (GGML_AMX)
    if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 11.0)
    else()
@ -597,12 +566,6 @@ function(get_flags CCID CCVER)
    elseif (CCID STREQUAL "GNU")
        set(C_FLAGS   -Wdouble-promotion)
        set(CXX_FLAGS -Wno-array-bounds)
        if (NOT GGML_MUSA)
            if (CCVER VERSION_GREATER_EQUAL 7.1.0)
                list(APPEND CXX_FLAGS -Wno-format-truncation)
            endif()
        endif()
        if (CCVER VERSION_GREATER_EQUAL 8.1.0)
            list(APPEND CXX_FLAGS -Wextra-semi)
        endif()
@ -768,7 +731,7 @@ endif()
 # ggml
-add_library(ggml-base STATIC
+add_library(ggml-base
            ../include/ggml.h
            ../include/ggml-alloc
            ../include/ggml-alloc.h
@ -779,18 +742,16 @@ add_library(ggml-base STATIC
            ggml-backend.cpp
            ggml-threading.cpp
            ggml-threading.h
-            ggml-quants.c # for quantize functions TODO: move dot fns to a separate file
+            ggml-quants.c
            ggml-quants.h
            ggml-aarch64.c
            ggml-aarch64.h
            )
 add_subdirectory(ggml-cpu)
 add_library(ggml
            ggml-backend-reg.cpp
            )
 add_subdirectory(ggml-cpu)
 target_link_libraries(ggml PUBLIC ggml-base ggml-cpu)
@ -850,5 +811,6 @@ target_link_libraries(ggml PRIVATE ${GGML_EXTRA_LIBS_PRIVATE} PUBLIC ${GGML_EXTR
 if (BUILD_SHARED_LIBS)
    set_target_properties(ggml-base PROPERTIES POSITION_INDEPENDENT_CODE ON)
    set_target_properties(ggml      PROPERTIES POSITION_INDEPENDENT_CODE ON)
-    target_compile_definitions(ggml PRIVATE GGML_SHARED GGML_BUILD)
+    target_compile_definitions(ggml-base PRIVATE GGML_BUILD)
    target_compile_definitions(ggml-base PUBLIC  GGML_SHARED)
 endif()
--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
@ -1,5 +1,7 @@
 #include "ggml-backend.h"
 #include "ggml-backend-impl.h"
 #include "ggml-backend.h"
 #include "ggml-cpu.h"
 #include "ggml-impl.h"
 #include <cstring>
 #include <vector>
@ -45,8 +47,6 @@
 #include "ggml-kompute.h"
 #endif
 #include "ggml-cpu.h"
 struct ggml_backend_registry {
    std::vector<ggml_backend_reg_t> backends;
    std::vector<ggml_backend_dev_t> devices;
--- a/ggml/src/ggml-cpu/ggml-cpu-quants.c
+++ b/ggml/src/ggml-cpu/ggml-cpu-quants.c
@ -10783,22 +10783,6 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * restrict s, size_t bs, const void *
 #endif
 }
 //
 // ============================================= 3-bit using D4 lattice
 //
 void quantize_row_iq3_xxs(const float * restrict x, void * restrict vy, int64_t k) {
    assert(k % QK_K == 0);
    block_iq3_xxs * restrict y = vy;
    quantize_row_iq3_xxs_ref(x, y, k);
 }
 void quantize_row_iq3_s(const float * restrict x, void * restrict vy, int64_t k) {
    assert(k % QK_K == 0);
    block_iq3_s * restrict y = vy;
    quantize_row_iq3_s_ref(x, y, k);
 }
 // ============================ 4-bit non-linear quants
 void quantize_row_iq4_nl(const float * restrict x, void * restrict y, int64_t k) {
@ -10810,10 +10794,3 @@ void quantize_row_iq4_xs(const float * restrict x, void * restrict y, int64_t k)
    assert(k % QK_K == 0);
    quantize_iq4_xs(x, y, 1, k, NULL);
 }
 // =============================== 2.5625 bpw
 void quantize_row_iq2_s(const float * restrict x, void * restrict y, int64_t k) {
    assert(k % QK_K == 0);
    quantize_iq2_s(x, y, 1, k, NULL);
 }
--- a/ggml/src/ggml-cpu/ggml-cpu-quants.h
+++ b/ggml/src/ggml-cpu/ggml-cpu-quants.h
@ -29,11 +29,8 @@ void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
 void quantize_row_tq1_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_tq2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_iq4_nl (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_iq3_s  (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_iq2_s  (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 // Dot product
 void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
--- a/ggml/src/ggml-cpu/ggml-cpu.c
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
@ -356,19 +356,20 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
        .nrows                    = 1,
    },
    [GGML_TYPE_IQ3_XXS] = {
-        .from_float               = quantize_row_iq3_xxs,
+        // NOTE: from_float for iq3 and iq2_s was removed because these quants require initialization in ggml_quantize_init
        //.from_float               = quantize_row_iq3_xxs,
        .vec_dot                  = ggml_vec_dot_iq3_xxs_q8_K,
        .vec_dot_type             = GGML_TYPE_Q8_K,
        .nrows                    = 1,
    },
    [GGML_TYPE_IQ3_S] = {
-        .from_float               = quantize_row_iq3_s,
+        //.from_float               = quantize_row_iq3_s,
        .vec_dot                  = ggml_vec_dot_iq3_s_q8_K,
        .vec_dot_type             = GGML_TYPE_Q8_K,
        .nrows                    = 1,
    },
    [GGML_TYPE_IQ2_S] = {
-        .from_float               = quantize_row_iq2_s,
+        //.from_float               = quantize_row_iq2_s,
        .vec_dot                  = ggml_vec_dot_iq2_s_q8_K,
        .vec_dot_type             = GGML_TYPE_Q8_K,
        .nrows                    = 1,
--- a/ggml/src/ggml-cpu/ggml-cpu.cpp
+++ b/ggml/src/ggml-cpu/ggml-cpu.cpp
@ -11,6 +11,14 @@
 #include <sys/sysctl.h>
 #endif
 #if defined(_WIN32)
 #define WIN32_LEAN_AND_MEAN
 #ifndef NOMINMAX
    #define NOMINMAX
 #endif
 #include <windows.h>
 #endif
 // ggml-backend interface
 #ifdef GGML_USE_CPU_HBM
--- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h
@ -3,7 +3,6 @@
 // GGML internal header
 #include "ggml.h"
 #include <assert.h>
 #include <math.h>
 #include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
@ -11,6 +10,18 @@
 #include <stdint.h>
 #include <string.h>
 #ifdef __ARM_FEATURE_SVE
 #include <arm_sve.h>
 #endif // __ARM_FEATURE_SVE
 #if defined(__ARM_NEON)
 // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
 //
 //   $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
 //
 #include <arm_neon.h>
 #endif
 #ifdef __cplusplus
 extern "C" {
 #endif
@ -121,14 +132,12 @@ struct ggml_map_custom1_op_params {
    void             * userdata;
 };
 struct ggml_map_custom2_op_params {
    ggml_custom2_op_t   fun;
    int                 n_tasks;
    void              * userdata;
 };
 struct ggml_map_custom3_op_params {
    ggml_custom3_op_t fun;
    int n_tasks;
@ -291,13 +300,6 @@ void ggml_aligned_free(void * ptr, size_t size);
 // FP16 to FP32 conversion
 #if defined(__ARM_NEON)
 // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
 //
 //   $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
 //
 #include <arm_neon.h>
    #ifdef _MSC_VER
        typedef uint16_t ggml_fp16_internal_t;
    #else
@ -306,7 +308,6 @@ typedef __fp16 ggml_fp16_internal_t;
 #endif
 #if defined(__ARM_NEON) && !defined(_MSC_VER)
    #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
    #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
@ -325,9 +326,7 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
        return res;
    }
-#else
+#elif defined(__F16C__)
 #ifdef __F16C__
    #ifdef _MSC_VER
        #define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
@ -399,7 +398,7 @@ static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
        const uint32_t two_w = w + w;
        const uint32_t exp_offset = UINT32_C(0xE0) << 23;
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
+    #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L)
        const float exp_scale = 0x1.0p-112f;
    #else
        const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
@ -417,7 +416,7 @@ static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
    }
    static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
+    #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L)
        const float scale_to_inf = 0x1.0p+112f;
        const float scale_to_zero = 0x1.0p-110f;
    #else
@ -445,17 +444,11 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
    #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
    #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
 #endif // __F16C__
 #endif // defined(__ARM_NEON) && (!defined(__MSC_VER)
 #ifdef __ARM_FEATURE_SVE
 #include <arm_sve.h>
 #endif // __ARM_FEATURE_SVE
 // precomputed f32 table for f16 (256 KB)
 // defined in ggml.c, initialized in ggml_init()
-extern float ggml_table_f32_f16[1 << 16];
+GGML_API float ggml_table_f32_f16[1 << 16];
 // On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
 // so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -29,5 +29,6 @@ target_link_libraries(llama PUBLIC ggml)
 if (BUILD_SHARED_LIBS)
    set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)
-    target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD)
+    target_compile_definitions(llama PRIVATE LLAMA_BUILD)
    target_compile_definitions(llama PUBLIC  LLAMA_SHARED)
 endif()