mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 19:21:46 +00:00
parent
0cd6bd3483
commit
554c247caf
36
.github/workflows/build.yml
vendored
36
.github/workflows/build.yml
vendored
@ -688,8 +688,6 @@ jobs:
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
OPENBLAS_VERSION: 0.3.23
|
OPENBLAS_VERSION: 0.3.23
|
||||||
OPENCL_VERSION: 2023.04.17
|
|
||||||
CLBLAST_VERSION: 1.6.0
|
|
||||||
SDE_VERSION: 9.33.0-2024-01-07
|
SDE_VERSION: 9.33.0-2024-01-07
|
||||||
VULKAN_VERSION: 1.3.261.1
|
VULKAN_VERSION: 1.3.261.1
|
||||||
|
|
||||||
@ -706,8 +704,6 @@ jobs:
|
|||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
|
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'avx512-x64'
|
- build: 'avx512-x64'
|
||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
|
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'clblast-x64'
|
|
||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
|
|
||||||
- build: 'openblas-x64'
|
- build: 'openblas-x64'
|
||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
||||||
- build: 'kompute-x64'
|
- build: 'kompute-x64'
|
||||||
@ -732,27 +728,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
git submodule update --init kompute
|
git submodule update --init kompute
|
||||||
|
|
||||||
- name: Download OpenCL SDK
|
|
||||||
id: get_opencl
|
|
||||||
if: ${{ matrix.build == 'clblast-x64' }}
|
|
||||||
run: |
|
|
||||||
curl.exe -o $env:RUNNER_TEMP/opencl.zip -L "https://github.com/KhronosGroup/OpenCL-SDK/releases/download/v${env:OPENCL_VERSION}/OpenCL-SDK-v${env:OPENCL_VERSION}-Win-x64.zip"
|
|
||||||
mkdir $env:RUNNER_TEMP/opencl
|
|
||||||
tar.exe -xvf $env:RUNNER_TEMP/opencl.zip --strip-components=1 -C $env:RUNNER_TEMP/opencl
|
|
||||||
|
|
||||||
- name: Download CLBlast
|
|
||||||
id: get_clblast
|
|
||||||
if: ${{ matrix.build == 'clblast-x64' }}
|
|
||||||
run: |
|
|
||||||
curl.exe -o $env:RUNNER_TEMP/clblast.7z -L "https://github.com/CNugteren/CLBlast/releases/download/${env:CLBLAST_VERSION}/CLBlast-${env:CLBLAST_VERSION}-windows-x64.7z"
|
|
||||||
curl.exe -o $env:RUNNER_TEMP/CLBlast.LICENSE.txt -L "https://github.com/CNugteren/CLBlast/raw/${env:CLBLAST_VERSION}/LICENSE"
|
|
||||||
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/clblast.7z
|
|
||||||
rename-item $env:RUNNER_TEMP/CLBlast-${env:CLBLAST_VERSION}-windows-x64 clblast
|
|
||||||
foreach ($f in (gci -Recurse -Path "$env:RUNNER_TEMP/clblast" -Filter '*.cmake')) {
|
|
||||||
$txt = Get-Content -Path $f -Raw
|
|
||||||
$txt.Replace('C:/vcpkg/packages/opencl_x64-windows/', "$($env:RUNNER_TEMP.Replace('\','/'))/opencl/") | Set-Content -Path $f -Encoding UTF8
|
|
||||||
}
|
|
||||||
|
|
||||||
- name: Download OpenBLAS
|
- name: Download OpenBLAS
|
||||||
id: get_openblas
|
id: get_openblas
|
||||||
if: ${{ matrix.build == 'openblas-x64' }}
|
if: ${{ matrix.build == 'openblas-x64' }}
|
||||||
@ -786,13 +761,6 @@ jobs:
|
|||||||
cmake -S . -B build ${{ matrix.defines }}
|
cmake -S . -B build ${{ matrix.defines }}
|
||||||
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
||||||
|
|
||||||
- name: Add clblast.dll
|
|
||||||
id: add_clblast_dll
|
|
||||||
if: ${{ matrix.build == 'clblast-x64' }}
|
|
||||||
run: |
|
|
||||||
cp $env:RUNNER_TEMP/clblast/lib/clblast.dll ./build/bin/Release
|
|
||||||
cp $env:RUNNER_TEMP/CLBlast.LICENSE.txt ./build/bin/Release/CLBlast-${env:CLBLAST_VERSION}.txt
|
|
||||||
|
|
||||||
- name: Add libopenblas.dll
|
- name: Add libopenblas.dll
|
||||||
id: add_libopenblas_dll
|
id: add_libopenblas_dll
|
||||||
if: ${{ matrix.build == 'openblas-x64' }}
|
if: ${{ matrix.build == 'openblas-x64' }}
|
||||||
@ -816,7 +784,7 @@ jobs:
|
|||||||
- name: Test
|
- name: Test
|
||||||
id: cmake_test
|
id: cmake_test
|
||||||
# not all machines have native AVX-512
|
# not all machines have native AVX-512
|
||||||
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'clblast-x64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
|
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
|
||||||
run: |
|
run: |
|
||||||
cd build
|
cd build
|
||||||
ctest -L main -C Release --verbose --timeout 900
|
ctest -L main -C Release --verbose --timeout 900
|
||||||
@ -1071,7 +1039,7 @@ jobs:
|
|||||||
# hypervisor: 'qemu'
|
# hypervisor: 'qemu'
|
||||||
# run: |
|
# run: |
|
||||||
# sudo pkg update
|
# sudo pkg update
|
||||||
# sudo pkg install -y gmake automake autoconf pkgconf llvm15 clinfo clover opencl clblast openblas
|
# sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas
|
||||||
# gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
|
# gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
|
||||||
|
|
||||||
release:
|
release:
|
||||||
|
@ -111,7 +111,6 @@ option(LLAMA_CUDA_FA_ALL_QUANTS "llama: compile all quants for Flas
|
|||||||
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
|
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
|
||||||
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
|
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
|
||||||
option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF)
|
option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF)
|
||||||
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
|
|
||||||
option(LLAMA_VULKAN "llama: use Vulkan" OFF)
|
option(LLAMA_VULKAN "llama: use Vulkan" OFF)
|
||||||
option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF)
|
option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF)
|
||||||
option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF)
|
option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF)
|
||||||
@ -502,22 +501,6 @@ if (LLAMA_RPC)
|
|||||||
set(GGML_SOURCES_RPC ggml-rpc.cpp)
|
set(GGML_SOURCES_RPC ggml-rpc.cpp)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_CLBLAST)
|
|
||||||
find_package(CLBlast)
|
|
||||||
if (CLBlast_FOUND)
|
|
||||||
message(STATUS "CLBlast found")
|
|
||||||
|
|
||||||
set(GGML_HEADERS_OPENCL ggml-opencl.h)
|
|
||||||
set(GGML_SOURCES_OPENCL ggml-opencl.cpp)
|
|
||||||
|
|
||||||
add_compile_definitions(GGML_USE_CLBLAST)
|
|
||||||
|
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast)
|
|
||||||
else()
|
|
||||||
message(WARNING "CLBlast not found")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (LLAMA_VULKAN)
|
if (LLAMA_VULKAN)
|
||||||
find_package(Vulkan)
|
find_package(Vulkan)
|
||||||
if (Vulkan_FOUND)
|
if (Vulkan_FOUND)
|
||||||
@ -1265,7 +1248,6 @@ add_library(ggml OBJECT
|
|||||||
ggml-quants.c
|
ggml-quants.c
|
||||||
ggml-quants.h
|
ggml-quants.h
|
||||||
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
|
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
|
||||||
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
|
|
||||||
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
|
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
|
||||||
${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC}
|
${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC}
|
||||||
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
|
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
|
||||||
@ -1353,8 +1335,9 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
|
|||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)
|
||||||
|
|
||||||
set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
|
set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
|
||||||
"${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
|
"${GGML_HEADERS_CUDA}"
|
||||||
"${GGML_HEADERS_METAL}" "${GGML_HEADERS_EXTRA}")
|
"${GGML_HEADERS_METAL}"
|
||||||
|
"${GGML_HEADERS_EXTRA}")
|
||||||
|
|
||||||
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
||||||
install(TARGETS ggml PUBLIC_HEADER)
|
install(TARGETS ggml PUBLIC_HEADER)
|
||||||
|
17
Makefile
17
Makefile
@ -547,23 +547,6 @@ ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h
|
|||||||
$(NVCC_COMPILE)
|
$(NVCC_COMPILE)
|
||||||
endif # LLAMA_CUDA
|
endif # LLAMA_CUDA
|
||||||
|
|
||||||
ifdef LLAMA_CLBLAST
|
|
||||||
MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
|
|
||||||
MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
|
|
||||||
MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
|
|
||||||
|
|
||||||
# Mac provides OpenCL as a framework
|
|
||||||
ifeq ($(UNAME_S),Darwin)
|
|
||||||
MK_LDFLAGS += -lclblast -framework OpenCL
|
|
||||||
else
|
|
||||||
MK_LDFLAGS += $(shell pkg-config --libs clblast OpenCL)
|
|
||||||
endif
|
|
||||||
OBJS += ggml-opencl.o
|
|
||||||
|
|
||||||
ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
|
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
||||||
endif # LLAMA_CLBLAST
|
|
||||||
|
|
||||||
ifdef LLAMA_VULKAN
|
ifdef LLAMA_VULKAN
|
||||||
MK_CPPFLAGS += -DGGML_USE_VULKAN
|
MK_CPPFLAGS += -DGGML_USE_VULKAN
|
||||||
MK_LDFLAGS += -lvulkan
|
MK_LDFLAGS += -lvulkan
|
||||||
|
@ -29,7 +29,7 @@ The llama.cpp SYCL backend is designed to support **Intel GPU** firstly. Based o
|
|||||||
|
|
||||||
When targeting **Intel CPU**, it is recommended to use llama.cpp for [Intel oneMKL](README.md#intel-onemkl) backend.
|
When targeting **Intel CPU**, it is recommended to use llama.cpp for [Intel oneMKL](README.md#intel-onemkl) backend.
|
||||||
|
|
||||||
It has the similar design of other llama.cpp BLAS-based paths such as *OpenBLAS, cuBLAS, CLBlast etc..*. In beginning work, the oneAPI's [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) open-source migration tool (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) was used for this purpose.
|
It has the similar design of other llama.cpp BLAS-based paths such as *OpenBLAS, cuBLAS, etc..*. In beginning work, the oneAPI's [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) open-source migration tool (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) was used for this purpose.
|
||||||
|
|
||||||
## News
|
## News
|
||||||
|
|
||||||
|
116
README.md
116
README.md
@ -77,7 +77,7 @@ variety of hardware - locally and in the cloud.
|
|||||||
- AVX, AVX2 and AVX512 support for x86 architectures
|
- AVX, AVX2 and AVX512 support for x86 architectures
|
||||||
- 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use
|
- 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use
|
||||||
- Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP)
|
- Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP)
|
||||||
- Vulkan, SYCL, and (partial) OpenCL backend support
|
- Vulkan and SYCL backend support
|
||||||
- CPU+GPU hybrid inference to partially accelerate models larger than the total VRAM capacity
|
- CPU+GPU hybrid inference to partially accelerate models larger than the total VRAM capacity
|
||||||
|
|
||||||
Since its [inception](https://github.com/ggerganov/llama.cpp/issues/33#issuecomment-1465108022), the project has
|
Since its [inception](https://github.com/ggerganov/llama.cpp/issues/33#issuecomment-1465108022), the project has
|
||||||
@ -371,16 +371,11 @@ In order to build llama.cpp you have four different options.
|
|||||||
3. Install compilation dependencies.
|
3. Install compilation dependencies.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
sudo pkg install gmake automake autoconf pkgconf llvm15 clinfo clover \
|
sudo pkg install gmake automake autoconf pkgconf llvm15 openblas
|
||||||
opencl clblast openblas
|
|
||||||
|
|
||||||
gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j4
|
gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j4
|
||||||
```
|
```
|
||||||
|
|
||||||
**Notes:** With this packages you can build llama.cpp with OPENBLAS and
|
|
||||||
CLBLAST support for use OpenCL GPU acceleration in FreeBSD. Please read
|
|
||||||
the instructions for use and activate this options in this document below.
|
|
||||||
|
|
||||||
### Homebrew
|
### Homebrew
|
||||||
|
|
||||||
On Mac and Linux, the homebrew package manager can be used via
|
On Mac and Linux, the homebrew package manager can be used via
|
||||||
@ -399,7 +394,7 @@ argument.
|
|||||||
|
|
||||||
### BLAS Build
|
### BLAS Build
|
||||||
|
|
||||||
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Support with CPU-only BLAS implementations doesn't affect the normal generation performance. We may see generation performance improvements with GPU-involved BLAS implementations, e.g. cuBLAS, hipBLAS and CLBlast. There are currently several different BLAS implementations available for build and use:
|
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Support with CPU-only BLAS implementations doesn't affect the normal generation performance. We may see generation performance improvements with GPU-involved BLAS implementations, e.g. cuBLAS, hipBLAS. There are currently several different BLAS implementations available for build and use:
|
||||||
|
|
||||||
- #### Accelerate Framework:
|
- #### Accelerate Framework:
|
||||||
|
|
||||||
@ -553,111 +548,6 @@ Building the program with BLAS support may lead to some performance improvements
|
|||||||
| LLAMA_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the HIP mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. Does not affect k-quants. |
|
| LLAMA_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the HIP mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. Does not affect k-quants. |
|
||||||
| LLAMA_CUDA_KQUANTS_ITER | 1 or 2 | 2 | Number of values processed per iteration and per HIP thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. |
|
| LLAMA_CUDA_KQUANTS_ITER | 1 or 2 | 2 | Number of values processed per iteration and per HIP thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. |
|
||||||
|
|
||||||
- #### CLBlast
|
|
||||||
|
|
||||||
OpenCL acceleration is provided by the matrix multiplication kernels from the [CLBlast](https://github.com/CNugteren/CLBlast) project and custom kernels for ggml that can generate tokens on the GPU.
|
|
||||||
|
|
||||||
You will need the [OpenCL SDK](https://github.com/KhronosGroup/OpenCL-SDK).
|
|
||||||
- For Ubuntu, Debian, and Fedora the packages `opencl-headers`, `ocl-icd` may be needed.
|
|
||||||
|
|
||||||
- For Windows, a pre-built SDK is available on the [OpenCL Releases](https://github.com/KhronosGroup/OpenCL-SDK/releases) page.
|
|
||||||
|
|
||||||
- <details>
|
|
||||||
<summary>Installing the OpenCL SDK from source</summary>
|
|
||||||
|
|
||||||
```sh
|
|
||||||
git clone --recurse-submodules https://github.com/KhronosGroup/OpenCL-SDK.git
|
|
||||||
cd OpenCL-SDK
|
|
||||||
cmake -B build -DBUILD_DOCS=OFF \
|
|
||||||
-DBUILD_EXAMPLES=OFF \
|
|
||||||
-DBUILD_TESTING=OFF \
|
|
||||||
-DOPENCL_SDK_BUILD_SAMPLES=OFF \
|
|
||||||
-DOPENCL_SDK_TEST_SAMPLES=OFF
|
|
||||||
cmake --build build
|
|
||||||
cmake --install build --prefix /some/path
|
|
||||||
```
|
|
||||||
</details>
|
|
||||||
|
|
||||||
##### Installing CLBlast
|
|
||||||
|
|
||||||
Pre-built CLBlast binaries may be found on the [CLBlast Releases](https://github.com/CNugteren/CLBlast/releases) page. For Unix variants, it may also be found in your operating system's packages.
|
|
||||||
|
|
||||||
Linux packaging:
|
|
||||||
Fedora Linux:
|
|
||||||
```bash
|
|
||||||
sudo dnf install clblast
|
|
||||||
```
|
|
||||||
|
|
||||||
Alternatively, they may be built from source.
|
|
||||||
|
|
||||||
- <details>
|
|
||||||
<summary>Windows:</summary>
|
|
||||||
|
|
||||||
```cmd
|
|
||||||
set OPENCL_SDK_ROOT="C:/OpenCL-SDK-v2023.04.17-Win-x64"
|
|
||||||
git clone https://github.com/CNugteren/CLBlast.git
|
|
||||||
cd CLBlast
|
|
||||||
cmake -B build -DBUILD_SHARED_LIBS=OFF -DOVERRIDE_MSVC_FLAGS_TO_MT=OFF -DTUNERS=OFF -DOPENCL_ROOT=%OPENCL_SDK_ROOT% -G "Visual Studio 17 2022" -A x64
|
|
||||||
cmake --build build --config Release
|
|
||||||
cmake --install build --prefix C:/CLBlast
|
|
||||||
```
|
|
||||||
|
|
||||||
(note: `--config Release` at build time is the default and only relevant for Visual Studio builds - or multi-config Ninja builds)
|
|
||||||
|
|
||||||
- <details>
|
|
||||||
<summary>Unix:</summary>
|
|
||||||
|
|
||||||
```sh
|
|
||||||
git clone https://github.com/CNugteren/CLBlast.git
|
|
||||||
cd CLBlast
|
|
||||||
cmake -B build -DBUILD_SHARED_LIBS=OFF -DTUNERS=OFF
|
|
||||||
cmake --build build --config Release
|
|
||||||
cmake --install build --prefix /some/path
|
|
||||||
```
|
|
||||||
|
|
||||||
Where `/some/path` is where the built library will be installed (default is `/usr/local`).
|
|
||||||
</details>
|
|
||||||
|
|
||||||
##### Building Llama with CLBlast
|
|
||||||
|
|
||||||
- Build with make:
|
|
||||||
```sh
|
|
||||||
make LLAMA_CLBLAST=1
|
|
||||||
```
|
|
||||||
- CMake (Unix):
|
|
||||||
```sh
|
|
||||||
cmake -B build -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
|
||||||
cmake --build build --config Release
|
|
||||||
```
|
|
||||||
- CMake (Windows):
|
|
||||||
```cmd
|
|
||||||
set CL_BLAST_CMAKE_PKG="C:/CLBlast/lib/cmake/CLBlast"
|
|
||||||
git clone https://github.com/ggerganov/llama.cpp
|
|
||||||
cd llama.cpp
|
|
||||||
cmake -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=%CL_BLAST_CMAKE_PKG% -G "Visual Studio 17 2022" -A x64
|
|
||||||
cmake --build build --config Release
|
|
||||||
cmake --install build --prefix C:/LlamaCPP
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Running Llama with CLBlast
|
|
||||||
|
|
||||||
The CLBlast build supports `--gpu-layers|-ngl` like the CUDA version does.
|
|
||||||
|
|
||||||
To select the correct platform (driver) and device (GPU), you can use the environment variables `GGML_OPENCL_PLATFORM` and `GGML_OPENCL_DEVICE`.
|
|
||||||
The selection can be a number (starting from 0) or a text string to search:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
GGML_OPENCL_PLATFORM=1 ./main ...
|
|
||||||
GGML_OPENCL_DEVICE=2 ./main ...
|
|
||||||
GGML_OPENCL_PLATFORM=Intel ./main ...
|
|
||||||
GGML_OPENCL_PLATFORM=AMD GGML_OPENCL_DEVICE=1 ./main ...
|
|
||||||
```
|
|
||||||
|
|
||||||
The default behavior is to find the first GPU device, but when it is an integrated GPU on a laptop, for instance, the selectors are useful.
|
|
||||||
Using the variables it is possible to select a CPU-based driver as well, if so desired.
|
|
||||||
|
|
||||||
You can get a list of platforms and devices from the `clinfo -l` command, etc.
|
|
||||||
|
|
||||||
- #### Vulkan
|
- #### Vulkan
|
||||||
|
|
||||||
**With docker**:
|
**With docker**:
|
||||||
|
@ -2844,7 +2844,6 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
|
|||||||
fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false");
|
fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_cuda: %s\n", ggml_cpu_has_cuda() ? "true" : "false");
|
fprintf(stream, "cpu_has_cuda: %s\n", ggml_cpu_has_cuda() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_vulkan: %s\n", ggml_cpu_has_vulkan() ? "true" : "false");
|
fprintf(stream, "cpu_has_vulkan: %s\n", ggml_cpu_has_vulkan() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false");
|
|
||||||
fprintf(stream, "cpu_has_kompute: %s\n", ggml_cpu_has_kompute() ? "true" : "false");
|
fprintf(stream, "cpu_has_kompute: %s\n", ggml_cpu_has_kompute() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false");
|
fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_gpublas: %s\n", ggml_cpu_has_gpublas() ? "true" : "false");
|
fprintf(stream, "cpu_has_gpublas: %s\n", ggml_cpu_has_gpublas() ? "true" : "false");
|
||||||
|
@ -162,7 +162,7 @@ $ ./llama-bench -o csv
|
|||||||
```
|
```
|
||||||
|
|
||||||
```csv
|
```csv
|
||||||
build_commit,build_number,cuda,opencl,metal,gpu_blas,blas,cpu_info,gpu_info,model_filename,model_type,model_size,model_n_params,n_batch,n_threads,f16_kv,n_gpu_layers,main_gpu,mul_mat_q,tensor_split,n_prompt,n_gen,test_time,avg_ns,stddev_ns,avg_ts,stddev_ts
|
build_commit,build_number,cuda,metal,gpu_blas,blas,cpu_info,gpu_info,model_filename,model_type,model_size,model_n_params,n_batch,n_threads,f16_kv,n_gpu_layers,main_gpu,mul_mat_q,tensor_split,n_prompt,n_gen,test_time,avg_ns,stddev_ns,avg_ts,stddev_ts
|
||||||
"3469684","1275","1","0","0","1","1","13th Gen Intel(R) Core(TM) i9-13900K","NVIDIA GeForce RTX 3090 Ti","models/7B/ggml-model-q4_0.gguf","llama 7B mostly Q4_0","3825065984","6738415616","512","16","1","99","0","1","0.00","512","0","2023-09-23T12:09:01Z","212155977","732372","2413.341687","8.305961"
|
"3469684","1275","1","0","0","1","1","13th Gen Intel(R) Core(TM) i9-13900K","NVIDIA GeForce RTX 3090 Ti","models/7B/ggml-model-q4_0.gguf","llama 7B mostly Q4_0","3825065984","6738415616","512","16","1","99","0","1","0.00","512","0","2023-09-23T12:09:01Z","212155977","732372","2413.341687","8.305961"
|
||||||
"3469684","1275","1","0","0","1","1","13th Gen Intel(R) Core(TM) i9-13900K","NVIDIA GeForce RTX 3090 Ti","models/7B/ggml-model-q4_0.gguf","llama 7B mostly Q4_0","3825065984","6738415616","512","16","1","99","0","1","0.00","0","128","2023-09-23T12:09:02Z","969320879","2728399","132.052051","0.371342"
|
"3469684","1275","1","0","0","1","1","13th Gen Intel(R) Core(TM) i9-13900K","NVIDIA GeForce RTX 3090 Ti","models/7B/ggml-model-q4_0.gguf","llama 7B mostly Q4_0","3825065984","6738415616","512","16","1","99","0","1","0.00","0","128","2023-09-23T12:09:02Z","969320879","2728399","132.052051","0.371342"
|
||||||
```
|
```
|
||||||
@ -179,7 +179,6 @@ $ ./llama-bench -o json
|
|||||||
"build_commit": "3469684",
|
"build_commit": "3469684",
|
||||||
"build_number": 1275,
|
"build_number": 1275,
|
||||||
"cuda": true,
|
"cuda": true,
|
||||||
"opencl": false,
|
|
||||||
"metal": false,
|
"metal": false,
|
||||||
"gpu_blas": true,
|
"gpu_blas": true,
|
||||||
"blas": true,
|
"blas": true,
|
||||||
@ -210,7 +209,6 @@ $ ./llama-bench -o json
|
|||||||
"build_commit": "3469684",
|
"build_commit": "3469684",
|
||||||
"build_number": 1275,
|
"build_number": 1275,
|
||||||
"cuda": true,
|
"cuda": true,
|
||||||
"opencl": false,
|
|
||||||
"metal": false,
|
"metal": false,
|
||||||
"gpu_blas": true,
|
"gpu_blas": true,
|
||||||
"blas": true,
|
"blas": true,
|
||||||
@ -253,7 +251,6 @@ CREATE TABLE IF NOT EXISTS test (
|
|||||||
build_commit TEXT,
|
build_commit TEXT,
|
||||||
build_number INTEGER,
|
build_number INTEGER,
|
||||||
cuda INTEGER,
|
cuda INTEGER,
|
||||||
opencl INTEGER,
|
|
||||||
metal INTEGER,
|
metal INTEGER,
|
||||||
gpu_blas INTEGER,
|
gpu_blas INTEGER,
|
||||||
blas INTEGER,
|
blas INTEGER,
|
||||||
@ -279,6 +276,6 @@ CREATE TABLE IF NOT EXISTS test (
|
|||||||
stddev_ts REAL
|
stddev_ts REAL
|
||||||
);
|
);
|
||||||
|
|
||||||
INSERT INTO test (build_commit, build_number, cuda, opencl, metal, gpu_blas, blas, cpu_info, gpu_info, model_filename, model_type, model_size, model_n_params, n_batch, n_threads, f16_kv, n_gpu_layers, main_gpu, mul_mat_q, tensor_split, n_prompt, n_gen, test_time, avg_ns, stddev_ns, avg_ts, stddev_ts) VALUES ('3469684', '1275', '1', '0', '0', '1', '1', '13th Gen Intel(R) Core(TM) i9-13900K', 'NVIDIA GeForce RTX 3090 Ti', 'models/7B/ggml-model-q4_0.gguf', 'llama 7B mostly Q4_0', '3825065984', '6738415616', '512', '16', '1', '99', '0', '1', '0.00', '512', '0', '2023-09-23T12:10:30Z', '212693772', '743623', '2407.240204', '8.409634');
|
INSERT INTO test (build_commit, build_number, cuda, metal, gpu_blas, blas, cpu_info, gpu_info, model_filename, model_type, model_size, model_n_params, n_batch, n_threads, f16_kv, n_gpu_layers, main_gpu, mul_mat_q, tensor_split, n_prompt, n_gen, test_time, avg_ns, stddev_ns, avg_ts, stddev_ts) VALUES ('3469684', '1275', '1', '0', '0', '1', '1', '13th Gen Intel(R) Core(TM) i9-13900K', 'NVIDIA GeForce RTX 3090 Ti', 'models/7B/ggml-model-q4_0.gguf', 'llama 7B mostly Q4_0', '3825065984', '6738415616', '512', '16', '1', '99', '0', '1', '0.00', '512', '0', '2023-09-23T12:10:30Z', '212693772', '743623', '2407.240204', '8.409634');
|
||||||
INSERT INTO test (build_commit, build_number, cuda, opencl, metal, gpu_blas, blas, cpu_info, gpu_info, model_filename, model_type, model_size, model_n_params, n_batch, n_threads, f16_kv, n_gpu_layers, main_gpu, mul_mat_q, tensor_split, n_prompt, n_gen, test_time, avg_ns, stddev_ns, avg_ts, stddev_ts) VALUES ('3469684', '1275', '1', '0', '0', '1', '1', '13th Gen Intel(R) Core(TM) i9-13900K', 'NVIDIA GeForce RTX 3090 Ti', 'models/7B/ggml-model-q4_0.gguf', 'llama 7B mostly Q4_0', '3825065984', '6738415616', '512', '16', '1', '99', '0', '1', '0.00', '0', '128', '2023-09-23T12:10:31Z', '977925003', '4037361', '130.891159', '0.537692');
|
INSERT INTO test (build_commit, build_number, cuda, metal, gpu_blas, blas, cpu_info, gpu_info, model_filename, model_type, model_size, model_n_params, n_batch, n_threads, f16_kv, n_gpu_layers, main_gpu, mul_mat_q, tensor_split, n_prompt, n_gen, test_time, avg_ns, stddev_ns, avg_ts, stddev_ts) VALUES ('3469684', '1275', '1', '0', '0', '1', '1', '13th Gen Intel(R) Core(TM) i9-13900K', 'NVIDIA GeForce RTX 3090 Ti', 'models/7B/ggml-model-q4_0.gguf', 'llama 7B mostly Q4_0', '3825065984', '6738415616', '512', '16', '1', '99', '0', '1', '0.00', '0', '128', '2023-09-23T12:10:31Z', '977925003', '4037361', '130.891159', '0.537692');
|
||||||
```
|
```
|
||||||
|
@ -723,7 +723,6 @@ struct test {
|
|||||||
static const std::string build_commit;
|
static const std::string build_commit;
|
||||||
static const int build_number;
|
static const int build_number;
|
||||||
static const bool cuda;
|
static const bool cuda;
|
||||||
static const bool opencl;
|
|
||||||
static const bool vulkan;
|
static const bool vulkan;
|
||||||
static const bool kompute;
|
static const bool kompute;
|
||||||
static const bool metal;
|
static const bool metal;
|
||||||
@ -812,9 +811,6 @@ struct test {
|
|||||||
if (cuda) {
|
if (cuda) {
|
||||||
return GGML_CUDA_NAME;
|
return GGML_CUDA_NAME;
|
||||||
}
|
}
|
||||||
if (opencl) {
|
|
||||||
return "OpenCL";
|
|
||||||
}
|
|
||||||
if (vulkan) {
|
if (vulkan) {
|
||||||
return "Vulkan";
|
return "Vulkan";
|
||||||
}
|
}
|
||||||
@ -843,7 +839,7 @@ struct test {
|
|||||||
static const std::vector<std::string> & get_fields() {
|
static const std::vector<std::string> & get_fields() {
|
||||||
static const std::vector<std::string> fields = {
|
static const std::vector<std::string> fields = {
|
||||||
"build_commit", "build_number",
|
"build_commit", "build_number",
|
||||||
"cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas", "blas",
|
"cuda", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas", "blas",
|
||||||
"cpu_info", "gpu_info",
|
"cpu_info", "gpu_info",
|
||||||
"model_filename", "model_type", "model_size", "model_n_params",
|
"model_filename", "model_type", "model_size", "model_n_params",
|
||||||
"n_batch", "n_ubatch",
|
"n_batch", "n_ubatch",
|
||||||
@ -869,7 +865,7 @@ struct test {
|
|||||||
field == "avg_ns" || field == "stddev_ns") {
|
field == "avg_ns" || field == "stddev_ns") {
|
||||||
return INT;
|
return INT;
|
||||||
}
|
}
|
||||||
if (field == "cuda" || field == "opencl" || field == "vulkan" || field == "kompute" || field == "metal" ||
|
if (field == "cuda" || field == "vulkan" || field == "kompute" || field == "metal" ||
|
||||||
field == "gpu_blas" || field == "blas" || field == "sycl" ||field == "f16_kv" || field == "no_kv_offload" ||
|
field == "gpu_blas" || field == "blas" || field == "sycl" ||field == "f16_kv" || field == "no_kv_offload" ||
|
||||||
field == "flash_attn" || field == "use_mmap" || field == "embeddings") {
|
field == "flash_attn" || field == "use_mmap" || field == "embeddings") {
|
||||||
return BOOL;
|
return BOOL;
|
||||||
@ -898,7 +894,7 @@ struct test {
|
|||||||
}
|
}
|
||||||
std::vector<std::string> values = {
|
std::vector<std::string> values = {
|
||||||
build_commit, std::to_string(build_number),
|
build_commit, std::to_string(build_number),
|
||||||
std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(vulkan),
|
std::to_string(cuda), std::to_string(vulkan), std::to_string(vulkan),
|
||||||
std::to_string(metal), std::to_string(sycl), std::to_string(rpc), std::to_string(gpu_blas), std::to_string(blas),
|
std::to_string(metal), std::to_string(sycl), std::to_string(rpc), std::to_string(gpu_blas), std::to_string(blas),
|
||||||
cpu_info, gpu_info,
|
cpu_info, gpu_info,
|
||||||
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
|
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
|
||||||
@ -927,7 +923,6 @@ struct test {
|
|||||||
const std::string test::build_commit = LLAMA_COMMIT;
|
const std::string test::build_commit = LLAMA_COMMIT;
|
||||||
const int test::build_number = LLAMA_BUILD_NUMBER;
|
const int test::build_number = LLAMA_BUILD_NUMBER;
|
||||||
const bool test::cuda = !!ggml_cpu_has_cuda();
|
const bool test::cuda = !!ggml_cpu_has_cuda();
|
||||||
const bool test::opencl = !!ggml_cpu_has_clblast();
|
|
||||||
const bool test::vulkan = !!ggml_cpu_has_vulkan();
|
const bool test::vulkan = !!ggml_cpu_has_vulkan();
|
||||||
const bool test::kompute = !!ggml_cpu_has_kompute();
|
const bool test::kompute = !!ggml_cpu_has_kompute();
|
||||||
const bool test::metal = !!ggml_cpu_has_metal();
|
const bool test::metal = !!ggml_cpu_has_metal();
|
||||||
|
@ -8,16 +8,14 @@ Because this example is "outside of the source tree", it is important to first b
|
|||||||
|
|
||||||
### Considerations
|
### Considerations
|
||||||
|
|
||||||
When hardware acceleration libraries are used (e.g. CUDA, Metal, CLBlast, etc.), CMake must be able to locate the associated CMake package. In the example below, when building _main-cmake-pkg_ notice the `CMAKE_PREFIX_PATH` includes the Llama CMake package location _in addition to_ the CLBlast package—which was used when compiling _llama.cpp_.
|
When hardware acceleration libraries are used (e.g. CUDA, Metal, etc.), CMake must be able to locate the associated CMake package.
|
||||||
|
|
||||||
### Build llama.cpp and install to C:\LlamaCPP directory
|
### Build llama.cpp and install to C:\LlamaCPP directory
|
||||||
|
|
||||||
In this case, CLBlast was already installed so the CMake package is referenced in `CMAKE_PREFIX_PATH`.
|
|
||||||
|
|
||||||
```cmd
|
```cmd
|
||||||
git clone https://github.com/ggerganov/llama.cpp
|
git clone https://github.com/ggerganov/llama.cpp
|
||||||
cd llama.cpp
|
cd llama.cpp
|
||||||
cmake -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=C:/CLBlast/lib/cmake/CLBlast -G "Visual Studio 17 2022" -A x64
|
cmake -B build -DBUILD_SHARED_LIBS=OFF -G "Visual Studio 17 2022" -A x64
|
||||||
cmake --build build --config Release
|
cmake --build build --config Release
|
||||||
cmake --install build --prefix C:/LlamaCPP
|
cmake --install build --prefix C:/LlamaCPP
|
||||||
```
|
```
|
||||||
@ -27,7 +25,7 @@ cmake --install build --prefix C:/LlamaCPP
|
|||||||
|
|
||||||
```cmd
|
```cmd
|
||||||
cd ..\examples\main-cmake-pkg
|
cd ..\examples\main-cmake-pkg
|
||||||
cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_PREFIX_PATH="C:/CLBlast/lib/cmake/CLBlast;C:/LlamaCPP/lib/cmake/Llama" -G "Visual Studio 17 2022" -A x64
|
cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_PREFIX_PATH="C:/LlamaCPP/lib/cmake/Llama" -G "Visual Studio 17 2022" -A x64
|
||||||
cmake --build build --config Release
|
cmake --build build --config Release
|
||||||
cmake --install build --prefix C:/MyLlamaApp
|
cmake --install build --prefix C:/MyLlamaApp
|
||||||
```
|
```
|
||||||
|
@ -159,7 +159,6 @@
|
|||||||
windows = config.legacyPackages.llamaPackagesWindows.llama-cpp;
|
windows = config.legacyPackages.llamaPackagesWindows.llama-cpp;
|
||||||
}
|
}
|
||||||
// lib.optionalAttrs pkgs.stdenv.isLinux {
|
// lib.optionalAttrs pkgs.stdenv.isLinux {
|
||||||
opencl = config.packages.default.override { useOpenCL = true; };
|
|
||||||
cuda = config.legacyPackages.llamaPackagesCuda.llama-cpp;
|
cuda = config.legacyPackages.llamaPackagesCuda.llama-cpp;
|
||||||
|
|
||||||
mpi-cpu = config.packages.default.override { useMpi = true; };
|
mpi-cpu = config.packages.default.override { useMpi = true; };
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
// An interface allowing to compute ggml_cgraph with Metal
|
// An interface allowing to compute ggml_cgraph with Metal
|
||||||
//
|
//
|
||||||
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
||||||
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, OpenCL, etc.)
|
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
|
||||||
//
|
//
|
||||||
// How it works?
|
// How it works?
|
||||||
//
|
//
|
||||||
|
2305
ggml-opencl.cpp
2305
ggml-opencl.cpp
File diff suppressed because it is too large
Load Diff
@ -1,36 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "ggml.h"
|
|
||||||
#include "ggml-backend.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
GGML_API void ggml_cl_init(void);
|
|
||||||
|
|
||||||
GGML_API void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
|
||||||
GGML_API void ggml_cl_add(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
|
||||||
GGML_API bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * dst);
|
|
||||||
GGML_API size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
|
||||||
GGML_API void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
|
||||||
|
|
||||||
// GGML_API void * ggml_cl_host_malloc(size_t size);
|
|
||||||
// GGML_API void ggml_cl_host_free(void * ptr);
|
|
||||||
|
|
||||||
GGML_API void ggml_cl_free_data(const struct ggml_tensor* tensor);
|
|
||||||
|
|
||||||
GGML_API void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
|
|
||||||
|
|
||||||
// backend API
|
|
||||||
|
|
||||||
// GGML_API ggml_backend_t ggml_backend_opencl_init(void);
|
|
||||||
|
|
||||||
// GGML_API bool ggml_backend_is_opencl(ggml_backend_t backend);
|
|
||||||
|
|
||||||
GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
|
|
||||||
// GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
62
ggml.c
62
ggml.c
@ -297,17 +297,12 @@ inline static void * ggml_calloc(size_t num, size_t size) {
|
|||||||
|
|
||||||
#if defined(GGML_USE_ACCELERATE)
|
#if defined(GGML_USE_ACCELERATE)
|
||||||
#include <Accelerate/Accelerate.h>
|
#include <Accelerate/Accelerate.h>
|
||||||
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
|
|
||||||
#include "ggml-opencl.h"
|
|
||||||
#endif
|
|
||||||
#elif defined(GGML_USE_OPENBLAS)
|
#elif defined(GGML_USE_OPENBLAS)
|
||||||
#if defined(GGML_BLAS_USE_MKL)
|
#if defined(GGML_BLAS_USE_MKL)
|
||||||
#include <mkl.h>
|
#include <mkl.h>
|
||||||
#else
|
#else
|
||||||
#include <cblas.h>
|
#include <cblas.h>
|
||||||
#endif
|
#endif
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
|
||||||
#include "ggml-opencl.h"
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// floating point type used to accumulate sums
|
// floating point type used to accumulate sums
|
||||||
@ -3380,10 +3375,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|||||||
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
|
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(GGML_USE_CLBLAST)
|
|
||||||
ggml_cl_init();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ggml_setup_op_has_task_pass();
|
ggml_setup_op_has_task_pass();
|
||||||
|
|
||||||
is_first_call = false;
|
is_first_call = false;
|
||||||
@ -9053,17 +9044,6 @@ static void ggml_compute_forward_add_f32(
|
|||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
|
|
||||||
#ifdef GGML_USE_CLBLAST
|
|
||||||
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
|
||||||
// TODO: OpenCL kernel support full broadcast
|
|
||||||
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
|
||||||
if (ith == 0) {
|
|
||||||
ggml_cl_add(src0, src1, dst);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const int nr = ggml_nrows(src0);
|
const int nr = ggml_nrows(src0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
@ -10171,17 +10151,6 @@ static void ggml_compute_forward_mul_f32(
|
|||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
|
|
||||||
#if defined(GGML_USE_CLBLAST)
|
|
||||||
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
|
||||||
// TODO: OpenCL kernel support full broadcast
|
|
||||||
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
|
||||||
if (ith == 0) {
|
|
||||||
ggml_cl_mul(src0, src1, dst);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const int64_t nr = ggml_nrows(src0);
|
const int64_t nr = ggml_nrows(src0);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
@ -12417,15 +12386,6 @@ static void ggml_compute_forward_mul_mat(
|
|||||||
// nb01 >= nb00 - src0 is not transposed
|
// nb01 >= nb00 - src0 is not transposed
|
||||||
// compute by src0 rows
|
// compute by src0 rows
|
||||||
|
|
||||||
#if defined(GGML_USE_CLBLAST)
|
|
||||||
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
|
||||||
if (params->ith == 0 && params->type == GGML_TASK_TYPE_COMPUTE) {
|
|
||||||
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||||
if (ggml_compute_forward_mul_mat_use_blas(dst)) {
|
if (ggml_compute_forward_mul_mat_use_blas(dst)) {
|
||||||
const int64_t ne_plane = ne01*ne00;
|
const int64_t ne_plane = ne01*ne00;
|
||||||
@ -12873,8 +12833,6 @@ static void ggml_compute_forward_out_prod_f32(
|
|||||||
// nb01 >= nb00 - src0 is not transposed
|
// nb01 >= nb00 - src0 is not transposed
|
||||||
// compute by src0 rows
|
// compute by src0 rows
|
||||||
|
|
||||||
// TODO: #if defined(GGML_USE_CLBLAST)
|
|
||||||
|
|
||||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||||
bool use_blas = ggml_is_matrix(src0) &&
|
bool use_blas = ggml_is_matrix(src0) &&
|
||||||
ggml_is_matrix(src1) &&
|
ggml_is_matrix(src1) &&
|
||||||
@ -13072,7 +13030,7 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|||||||
// nb01 >= nb00 - src0 is not transposed
|
// nb01 >= nb00 - src0 is not transposed
|
||||||
// compute by src0 rows
|
// compute by src0 rows
|
||||||
|
|
||||||
// TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
// TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||||
|
|
||||||
if (params->type == GGML_TASK_TYPE_INIT) {
|
if (params->type == GGML_TASK_TYPE_INIT) {
|
||||||
if (ith != 0) {
|
if (ith != 0) {
|
||||||
@ -19546,11 +19504,6 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|||||||
{
|
{
|
||||||
const enum ggml_type vec_dot_type = type_traits[node->src[0]->type].vec_dot_type;
|
const enum ggml_type vec_dot_type = type_traits[node->src[0]->type].vec_dot_type;
|
||||||
|
|
||||||
#if defined(GGML_USE_CLBLAST)
|
|
||||||
if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) {
|
|
||||||
cur = ggml_cl_mul_mat_get_wsize(node->src[0], node->src[1], node);
|
|
||||||
} else
|
|
||||||
#endif
|
|
||||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||||
if (ggml_compute_forward_mul_mat_use_blas(node)) {
|
if (ggml_compute_forward_mul_mat_use_blas(node)) {
|
||||||
if (node->src[0]->type != GGML_TYPE_F32) {
|
if (node->src[0]->type != GGML_TYPE_F32) {
|
||||||
@ -22859,7 +22812,7 @@ int ggml_cpu_has_wasm_simd(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_blas(void) {
|
int ggml_cpu_has_blas(void) {
|
||||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_SYCL)
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_SYCL)
|
||||||
return 1;
|
return 1;
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
@ -22874,14 +22827,6 @@ int ggml_cpu_has_cuda(void) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_clblast(void) {
|
|
||||||
#if defined(GGML_USE_CLBLAST)
|
|
||||||
return 1;
|
|
||||||
#else
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
int ggml_cpu_has_vulkan(void) {
|
int ggml_cpu_has_vulkan(void) {
|
||||||
#if defined(GGML_USE_VULKAN)
|
#if defined(GGML_USE_VULKAN)
|
||||||
return 1;
|
return 1;
|
||||||
@ -22915,8 +22860,7 @@ int ggml_cpu_has_rpc(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_gpublas(void) {
|
int ggml_cpu_has_gpublas(void) {
|
||||||
return ggml_cpu_has_cuda() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() ||
|
return ggml_cpu_has_cuda() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() || ggml_cpu_has_sycl();
|
||||||
ggml_cpu_has_sycl();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_sse3(void) {
|
int ggml_cpu_has_sse3(void) {
|
||||||
|
1
ggml.h
1
ggml.h
@ -2425,7 +2425,6 @@ extern "C" {
|
|||||||
GGML_API int ggml_cpu_has_wasm_simd (void);
|
GGML_API int ggml_cpu_has_wasm_simd (void);
|
||||||
GGML_API int ggml_cpu_has_blas (void);
|
GGML_API int ggml_cpu_has_blas (void);
|
||||||
GGML_API int ggml_cpu_has_cuda (void);
|
GGML_API int ggml_cpu_has_cuda (void);
|
||||||
GGML_API int ggml_cpu_has_clblast (void);
|
|
||||||
GGML_API int ggml_cpu_has_vulkan (void);
|
GGML_API int ggml_cpu_has_vulkan (void);
|
||||||
GGML_API int ggml_cpu_has_kompute (void);
|
GGML_API int ggml_cpu_has_kompute (void);
|
||||||
GGML_API int ggml_cpu_has_gpublas (void);
|
GGML_API int ggml_cpu_has_gpublas (void);
|
||||||
|
10
llama.cpp
10
llama.cpp
@ -13,8 +13,6 @@
|
|||||||
|
|
||||||
#ifdef GGML_USE_CUDA
|
#ifdef GGML_USE_CUDA
|
||||||
# include "ggml-cuda.h"
|
# include "ggml-cuda.h"
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
|
||||||
# include "ggml-opencl.h"
|
|
||||||
#elif defined(GGML_USE_VULKAN)
|
#elif defined(GGML_USE_VULKAN)
|
||||||
# include "ggml-vulkan.h"
|
# include "ggml-vulkan.h"
|
||||||
#elif defined(GGML_USE_SYCL)
|
#elif defined(GGML_USE_SYCL)
|
||||||
@ -2406,8 +2404,6 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(const llama_
|
|||||||
buft = ggml_backend_vk_buffer_type(gpu);
|
buft = ggml_backend_vk_buffer_type(gpu);
|
||||||
#elif defined(GGML_USE_SYCL)
|
#elif defined(GGML_USE_SYCL)
|
||||||
buft = ggml_backend_sycl_buffer_type(gpu);
|
buft = ggml_backend_sycl_buffer_type(gpu);
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
|
||||||
buft = ggml_backend_opencl_buffer_type();
|
|
||||||
#elif defined(GGML_USE_KOMPUTE)
|
#elif defined(GGML_USE_KOMPUTE)
|
||||||
buft = ggml_backend_kompute_buffer_type(gpu);
|
buft = ggml_backend_kompute_buffer_type(gpu);
|
||||||
if (buft == nullptr) {
|
if (buft == nullptr) {
|
||||||
@ -2530,10 +2526,6 @@ static bool llama_kv_cache_init(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GGML_USE_CLBLAST
|
|
||||||
offload = false;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// count used buffer types
|
// count used buffer types
|
||||||
std::map<ggml_backend_buffer_type_t, int> buft_layer_count;
|
std::map<ggml_backend_buffer_type_t, int> buft_layer_count;
|
||||||
if (offload) {
|
if (offload) {
|
||||||
@ -15921,7 +15913,7 @@ bool llama_supports_mlock(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool llama_supports_gpu_offload(void) {
|
bool llama_supports_gpu_offload(void) {
|
||||||
#if defined(GGML_USE_CUDA) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL) || defined(GGML_USE_VULKAN) || \
|
#if defined(GGML_USE_CUDA) || defined(GGML_USE_METAL) || defined(GGML_USE_VULKAN) || \
|
||||||
defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC)
|
defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC)
|
||||||
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
|
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
|
||||||
return true;
|
return true;
|
||||||
|
@ -5,7 +5,6 @@ set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
|||||||
set(LLAMA_BLAS @LLAMA_BLAS@)
|
set(LLAMA_BLAS @LLAMA_BLAS@)
|
||||||
set(LLAMA_CUDA @LLAMA_CUDA@)
|
set(LLAMA_CUDA @LLAMA_CUDA@)
|
||||||
set(LLAMA_METAL @LLAMA_METAL@)
|
set(LLAMA_METAL @LLAMA_METAL@)
|
||||||
set(LLAMA_CLBLAST @LLAMA_CLBLAST@)
|
|
||||||
set(LLAMA_HIPBLAS @LLAMA_HIPBLAS@)
|
set(LLAMA_HIPBLAS @LLAMA_HIPBLAS@)
|
||||||
set(LLAMA_ACCELERATE @LLAMA_ACCELERATE@)
|
set(LLAMA_ACCELERATE @LLAMA_ACCELERATE@)
|
||||||
|
|
||||||
@ -36,10 +35,6 @@ if (LLAMA_METAL)
|
|||||||
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_CLBLAST)
|
|
||||||
find_package(CLBlast REQUIRED)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (LLAMA_HIPBLAS)
|
if (LLAMA_HIPBLAS)
|
||||||
find_package(hip REQUIRED)
|
find_package(hip REQUIRED)
|
||||||
find_package(hipblas REQUIRED)
|
find_package(hipblas REQUIRED)
|
||||||
|
@ -19,17 +19,17 @@ logger = logging.getLogger("compare-llama-bench")
|
|||||||
|
|
||||||
# Properties by which to differentiate results per commit:
|
# Properties by which to differentiate results per commit:
|
||||||
KEY_PROPERTIES = [
|
KEY_PROPERTIES = [
|
||||||
"cpu_info", "gpu_info", "n_gpu_layers", "cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas",
|
"cpu_info", "gpu_info", "n_gpu_layers", "cuda", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas",
|
||||||
"blas", "model_filename", "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "embeddings", "n_threads",
|
"blas", "model_filename", "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "embeddings", "n_threads",
|
||||||
"type_k", "type_v", "use_mmap", "no_kv_offload", "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen"
|
"type_k", "type_v", "use_mmap", "no_kv_offload", "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen"
|
||||||
]
|
]
|
||||||
|
|
||||||
# Properties that are boolean and are converted to Yes/No for the table:
|
# Properties that are boolean and are converted to Yes/No for the table:
|
||||||
BOOL_PROPERTIES = ["cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "embeddings", "use_mmap", "no_kv_offload", "flash_attn"]
|
BOOL_PROPERTIES = ["cuda", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "embeddings", "use_mmap", "no_kv_offload", "flash_attn"]
|
||||||
|
|
||||||
# Header names for the table:
|
# Header names for the table:
|
||||||
PRETTY_NAMES = {
|
PRETTY_NAMES = {
|
||||||
"cuda": "CUDA", "opencl": "OpenCL", "vulkan": "Vulkan", "kompute": "Kompute", "metal": "Metal", "sycl": "SYCL", "rpc": "RPC",
|
"cuda": "CUDA", "vulkan": "Vulkan", "kompute": "Kompute", "metal": "Metal", "sycl": "SYCL", "rpc": "RPC",
|
||||||
"gpu_blas": "GPU BLAS", "blas": "BLAS", "cpu_info": "CPU", "gpu_info": "GPU", "model_filename": "File", "model_type": "Model",
|
"gpu_blas": "GPU BLAS", "blas": "BLAS", "cpu_info": "CPU", "gpu_info": "GPU", "model_filename": "File", "model_type": "Model",
|
||||||
"model_size": "Model Size [GiB]", "model_n_params": "Num. of Par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size",
|
"model_size": "Model Size [GiB]", "model_n_params": "Num. of Par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size",
|
||||||
"n_threads": "Threads", "type_k": "K type", "type_v": "V type", "n_gpu_layers": "GPU layers", "split_mode": "Split mode",
|
"n_threads": "Threads", "type_k": "K type", "type_v": "V type", "n_gpu_layers": "GPU layers", "split_mode": "Split mode",
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
# Helper script for deploying llama.cpp server with a single Bash command
|
# Helper script for deploying llama.cpp server with a single Bash command
|
||||||
#
|
#
|
||||||
# - Works on Linux and macOS
|
# - Works on Linux and macOS
|
||||||
# - Supports: CPU, CUDA, Metal, OpenCL
|
# - Supports: CPU, CUDA, Metal
|
||||||
# - Can run all GGUF models from HuggingFace
|
# - Can run all GGUF models from HuggingFace
|
||||||
# - Can serve requests in parallel
|
# - Can serve requests in parallel
|
||||||
# - Always builds latest llama.cpp from GitHub
|
# - Always builds latest llama.cpp from GitHub
|
||||||
@ -19,7 +19,7 @@
|
|||||||
# --port: port number, default is 8888
|
# --port: port number, default is 8888
|
||||||
# --repo: path to a repo containing GGUF model files
|
# --repo: path to a repo containing GGUF model files
|
||||||
# --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input
|
# --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input
|
||||||
# --backend: cpu, cuda, metal, opencl, depends on the OS
|
# --backend: cpu, cuda, metal, depends on the OS
|
||||||
# --gpu-id: gpu id, default is 0
|
# --gpu-id: gpu id, default is 0
|
||||||
# --n-parallel: number of parallel requests, default is 8
|
# --n-parallel: number of parallel requests, default is 8
|
||||||
# --n-kv: KV cache size, default is 4096
|
# --n-kv: KV cache size, default is 4096
|
||||||
@ -72,7 +72,7 @@ function print_usage {
|
|||||||
printf " --port: port number, default is 8888\n"
|
printf " --port: port number, default is 8888\n"
|
||||||
printf " --repo: path to a repo containing GGUF model files\n"
|
printf " --repo: path to a repo containing GGUF model files\n"
|
||||||
printf " --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input\n"
|
printf " --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input\n"
|
||||||
printf " --backend: cpu, cuda, metal, opencl, depends on the OS\n"
|
printf " --backend: cpu, cuda, metal, depends on the OS\n"
|
||||||
printf " --gpu-id: gpu id, default is 0\n"
|
printf " --gpu-id: gpu id, default is 0\n"
|
||||||
printf " --n-parallel: number of parallel requests, default is 8\n"
|
printf " --n-parallel: number of parallel requests, default is 8\n"
|
||||||
printf " --n-kv: KV cache size, default is 4096\n"
|
printf " --n-kv: KV cache size, default is 4096\n"
|
||||||
@ -387,9 +387,6 @@ elif [[ "$backend" == "cpu" ]]; then
|
|||||||
elif [[ "$backend" == "metal" ]]; then
|
elif [[ "$backend" == "metal" ]]; then
|
||||||
printf "[+] Building with Metal backend\n"
|
printf "[+] Building with Metal backend\n"
|
||||||
make -j server $log
|
make -j server $log
|
||||||
elif [[ "$backend" == "opencl" ]]; then
|
|
||||||
printf "[+] Building with OpenCL backend\n"
|
|
||||||
LLAMA_CLBLAST=1 make -j server $log
|
|
||||||
else
|
else
|
||||||
printf "[-] Unknown backend: %s\n" "$backend"
|
printf "[-] Unknown backend: %s\n" "$backend"
|
||||||
exit 1
|
exit 1
|
||||||
@ -407,8 +404,6 @@ elif [[ "$backend" == "cpu" ]]; then
|
|||||||
args="-ngl 0"
|
args="-ngl 0"
|
||||||
elif [[ "$backend" == "metal" ]]; then
|
elif [[ "$backend" == "metal" ]]; then
|
||||||
args="-ngl 999"
|
args="-ngl 999"
|
||||||
elif [[ "$backend" == "opencl" ]]; then
|
|
||||||
args="-ngl 999"
|
|
||||||
else
|
else
|
||||||
printf "[-] Unknown backend: %s\n" "$backend"
|
printf "[-] Unknown backend: %s\n" "$backend"
|
||||||
exit 1
|
exit 1
|
||||||
|
@ -106,8 +106,6 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
|||||||
# src/ggml-kompute.h -> ggml-kompute.h
|
# src/ggml-kompute.h -> ggml-kompute.h
|
||||||
# src/ggml-metal.h -> ggml-metal.h
|
# src/ggml-metal.h -> ggml-metal.h
|
||||||
# src/ggml-metal.m -> ggml-metal.m
|
# src/ggml-metal.m -> ggml-metal.m
|
||||||
# src/ggml-opencl.cpp -> ggml-opencl.cpp
|
|
||||||
# src/ggml-opencl.h -> ggml-opencl.h
|
|
||||||
# src/ggml-quants.c -> ggml-quants.c
|
# src/ggml-quants.c -> ggml-quants.c
|
||||||
# src/ggml-quants.h -> ggml-quants.h
|
# src/ggml-quants.h -> ggml-quants.h
|
||||||
# src/ggml-rpc.cpp -> ggml-rpc.cpp
|
# src/ggml-rpc.cpp -> ggml-rpc.cpp
|
||||||
@ -143,8 +141,6 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
|||||||
-e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \
|
-e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \
|
||||||
-e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
|
-e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
|
||||||
-e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
|
-e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
|
||||||
-e 's/src\/ggml-opencl\.cpp/ggml-opencl.cpp/g' \
|
|
||||||
-e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
|
|
||||||
-e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
|
-e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
|
||||||
-e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
|
-e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
|
||||||
-e 's/src\/ggml-rpc\.cpp/ggml-rpc.cpp/g' \
|
-e 's/src\/ggml-rpc\.cpp/ggml-rpc.cpp/g' \
|
||||||
|
@ -14,8 +14,6 @@ cp -rpv ../ggml/src/ggml-kompute.h ./ggml-kompute.h
|
|||||||
cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h
|
cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h
|
||||||
cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
|
cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
|
||||||
cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
|
cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
|
||||||
cp -rpv ../ggml/src/ggml-opencl.cpp ./ggml-opencl.cpp
|
|
||||||
cp -rpv ../ggml/src/ggml-opencl.h ./ggml-opencl.h
|
|
||||||
cp -rpv ../ggml/src/ggml-quants.c ./ggml-quants.c
|
cp -rpv ../ggml/src/ggml-quants.c ./ggml-quants.c
|
||||||
cp -rpv ../ggml/src/ggml-quants.h ./ggml-quants.h
|
cp -rpv ../ggml/src/ggml-quants.h ./ggml-quants.h
|
||||||
cp -rpv ../ggml/src/ggml-rpc.cpp ./ggml-rpc.cpp
|
cp -rpv ../ggml/src/ggml-rpc.cpp ./ggml-rpc.cpp
|
||||||
|
Loading…
Reference in New Issue
Block a user