mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-11 21:39:52 +00:00
0f1a39f343
* Arm AArch64: optimized GEMV and GEMM kernels for q4_0_q8_0, and q8_0_q8_0 quantization
* Arm AArch64: add optimized GEMV and GEMM asm kernels for q4_0_q8_0 quantization and refactor code to address llama.cpp pr#5780 suggestions
* Arm AArch64: add optimized GEMV and GEMM asm kernels for q4_0_q8_0 quantization and refactor code to address llama.cpp pr#5780 suggestions
* Arm AArch64: add optimized GEMV and GEMM asm kernels for q4_0_q8_0 quantization and refactor code to address llama.cpp pr#5780 suggestions
* Arm AArch64: add optimized GEMV and GEMM asm kernels for q4_0_q8_0 quantization and refactor code to address llama.cpp pr#5780 suggestions
* Arm AArch64: add copyright claim only to ggml-aarch64.cpp and ggml-aarch64.h files
* Arm AArch64: minor code refactoring for rebase
* Arm AArch64: minor code refactoring for resolving a build issue with cmake
* Arm AArch64: minor code refactoring to split the Q4_0_AARC64 type into three separate types: Q4_0_4_4, Q4_0_4_8, and Q4_0_8_8
* Arm AArch64: minor code change for resolving a build issue with server-windows
* retrigger checks
* Arm AArch64: minor code changes for rebase
* Arm AArch64: minor changes to skip the pr#7433 vec_dot code for arm cpus with SVE VL not equal to 256 bits
* Arm AArch64: remove stale LLAMA_QKK_64 from CMakeLists.txt and delete build.zig
* Arm AArch64: add reference scalar gemm and gemv, and avoid dynamic memory allocations during quantization for Q4_0_4_4, Q4_0_4_8, and Q4_0_8_8
* Arm AArch64: add multithreaded quantization support for the new types: Q4_0_4_4, Q4_0_4_8, and Q4_0_8_8
* Arm AArch64: minor code refactoring
* Arm AArch64: simplify logic for calling gemm and gemv functions in ggml_compute_forward_mul_mat
* Arm AArch64: minimize changes in ggml_compute_forward_mul_mat
* Arm AArch64: minor code refactoring, and add reference scalar code to quantize routines for new quant types
* Arm AArch64: minor code refactoring
* Arm AArch64: minor code refactoring
* Arm AArch64: minor code refactoring
* rebase on the latest master commit 3fd62a6
and adapt to the new directory structure
* Arm AArch64: remove a redundant comment
* Arm AArch64: add pragma in ggml-aarch64.c to turn -Woverlength-strings warning off
* Arm AArch64: use __aarch64__ check to guard 64-bit neon kernels
* Arm AArch64: update docs/build.md README to include compile time flags for buiilding the Q4_0_4_4 quant type
77 lines
1.9 KiB
Swift
77 lines
1.9 KiB
Swift
// swift-tools-version:5.5
|
|
|
|
import PackageDescription
|
|
|
|
var sources = [
|
|
"src/llama.cpp",
|
|
"src/unicode.cpp",
|
|
"src/unicode-data.cpp",
|
|
"ggml/src/ggml.c",
|
|
"ggml/src/ggml-alloc.c",
|
|
"ggml/src/ggml-backend.c",
|
|
"ggml/src/ggml-quants.c",
|
|
"ggml/src/ggml-aarch64.c",
|
|
]
|
|
|
|
var resources: [Resource] = []
|
|
var linkerSettings: [LinkerSetting] = []
|
|
var cSettings: [CSetting] = [
|
|
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
|
.unsafeFlags(["-fno-objc-arc"]),
|
|
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
|
// We should consider add this in the future when we drop support for iOS 14
|
|
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
|
|
// .define("ACCELERATE_NEW_LAPACK"),
|
|
// .define("ACCELERATE_LAPACK_ILP64")
|
|
]
|
|
|
|
#if canImport(Darwin)
|
|
sources.append("ggml/src/ggml-metal.m")
|
|
resources.append(.process("ggml/src/ggml-metal.metal"))
|
|
linkerSettings.append(.linkedFramework("Accelerate"))
|
|
cSettings.append(
|
|
contentsOf: [
|
|
.define("GGML_USE_ACCELERATE"),
|
|
.define("GGML_USE_METAL")
|
|
]
|
|
)
|
|
#endif
|
|
|
|
#if os(Linux)
|
|
cSettings.append(.define("_GNU_SOURCE"))
|
|
#endif
|
|
|
|
let package = Package(
|
|
name: "llama",
|
|
platforms: [
|
|
.macOS(.v12),
|
|
.iOS(.v14),
|
|
.watchOS(.v4),
|
|
.tvOS(.v14)
|
|
],
|
|
products: [
|
|
.library(name: "llama", targets: ["llama"]),
|
|
],
|
|
targets: [
|
|
.target(
|
|
name: "llama",
|
|
path: ".",
|
|
exclude: [
|
|
"cmake",
|
|
"examples",
|
|
"scripts",
|
|
"models",
|
|
"tests",
|
|
"CMakeLists.txt",
|
|
"Makefile"
|
|
],
|
|
sources: sources,
|
|
resources: resources,
|
|
publicHeadersPath: "spm-headers",
|
|
cSettings: cSettings,
|
|
linkerSettings: linkerSettings
|
|
)
|
|
],
|
|
cxxLanguageStandard: .cxx11
|
|
)
|