diff --git a/.devops/full-cuda.Dockerfile b/.devops/full-cuda.Dockerfile index f6073f662..2a7da586a 100644 --- a/.devops/full-cuda.Dockerfile +++ b/.devops/full-cuda.Dockerfile @@ -27,7 +27,7 @@ COPY . . # Set nvcc architecture ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} # Enable CUDA -ENV LLAMA_CUDA=1 +ENV GGML_CUDA=1 # Enable cURL ENV LLAMA_CURL=1 diff --git a/.devops/full-rocm.Dockerfile b/.devops/full-rocm.Dockerfile index 0314d469b..5cbd2e7a1 100644 --- a/.devops/full-rocm.Dockerfile +++ b/.devops/full-rocm.Dockerfile @@ -36,7 +36,7 @@ COPY . . # Set nvcc architecture ENV GPU_TARGETS=${ROCM_DOCKER_ARCH} # Enable ROCm -ENV LLAMA_HIPBLAS=1 +ENV GGML_HIPBLAS=1 ENV CC=/opt/rocm/llvm/bin/clang ENV CXX=/opt/rocm/llvm/bin/clang++ diff --git a/.devops/llama-cli-cuda.Dockerfile b/.devops/llama-cli-cuda.Dockerfile index d5ce538f6..bff946cbc 100644 --- a/.devops/llama-cli-cuda.Dockerfile +++ b/.devops/llama-cli-cuda.Dockerfile @@ -21,7 +21,7 @@ COPY . . # Set nvcc architecture ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} # Enable CUDA -ENV LLAMA_CUDA=1 +ENV GGML_CUDA=1 RUN make -j$(nproc) llama-cli diff --git a/.devops/llama-cli-intel.Dockerfile b/.devops/llama-cli-intel.Dockerfile index 6789e17af..bd816f9f5 100644 --- a/.devops/llama-cli-intel.Dockerfile +++ b/.devops/llama-cli-intel.Dockerfile @@ -2,7 +2,7 @@ ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04 FROM intel/oneapi-basekit:$ONEAPI_VERSION as build -ARG LLAMA_SYCL_F16=OFF +ARG GGML_SYCL_F16=OFF RUN apt-get update && \ apt-get install -y git @@ -10,11 +10,11 @@ WORKDIR /app COPY . . -RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ - echo "LLAMA_SYCL_F16 is set" && \ - export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ +RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ + echo "GGML_SYCL_F16 is set" && \ + export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ fi && \ - cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \ + cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \ cmake --build build --config Release --target llama-cli FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime diff --git a/.devops/llama-cli-rocm.Dockerfile b/.devops/llama-cli-rocm.Dockerfile index 7e8a6f0fa..caa507b08 100644 --- a/.devops/llama-cli-rocm.Dockerfile +++ b/.devops/llama-cli-rocm.Dockerfile @@ -36,7 +36,7 @@ COPY . . # Set nvcc architecture ENV GPU_TARGETS=${ROCM_DOCKER_ARCH} # Enable ROCm -ENV LLAMA_HIPBLAS=1 +ENV GGML_HIPBLAS=1 ENV CC=/opt/rocm/llvm/bin/clang ENV CXX=/opt/rocm/llvm/bin/clang++ diff --git a/.devops/llama-cli-vulkan.Dockerfile b/.devops/llama-cli-vulkan.Dockerfile index 7a0abe71f..6155d5881 100644 --- a/.devops/llama-cli-vulkan.Dockerfile +++ b/.devops/llama-cli-vulkan.Dockerfile @@ -14,7 +14,7 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key # Build it WORKDIR /app COPY . . -RUN cmake -B build -DLLAMA_VULKAN=1 && \ +RUN cmake -B build -DGGML_VULKAN=1 && \ cmake --build build --config Release --target llama-cli # Clean up diff --git a/.devops/llama-cpp-clblast.srpm.spec b/.devops/llama-cpp-clblast.srpm.spec deleted file mode 100644 index 013952191..000000000 --- a/.devops/llama-cpp-clblast.srpm.spec +++ /dev/null @@ -1,84 +0,0 @@ -# SRPM for building from source and packaging an RPM for RPM-based distros. -# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages -# Built and maintained by John Boero - boeroboy@gmail.com -# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal - -# Notes for llama.cpp: -# 1. Tags are currently based on hash - which will not sort asciibetically. -# We need to declare standard versioning if people want to sort latest releases. -# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. -# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. -# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo -# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. -# It is up to the user to install the correct vendor-specific support. - -Name: llama.cpp-clblast -Version: %( date "+%%Y%%m%%d" ) -Release: 1%{?dist} -Summary: OpenCL Inference of LLaMA model in C/C++ -License: MIT -Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz -BuildRequires: coreutils make gcc-c++ git mesa-libOpenCL-devel clblast-devel -Requires: clblast -URL: https://github.com/ggerganov/llama.cpp - -%define debug_package %{nil} -%define source_date_epoch_from_changelog 0 - -%description -CPU inference for Meta's Lllama2 models using default options. - -%prep -%setup -n llama.cpp-master - -%build -make -j LLAMA_CLBLAST=1 - -%install -mkdir -p %{buildroot}%{_bindir}/ -cp -p llama-cli %{buildroot}%{_bindir}/llama-clblast-cli -cp -p llama-server %{buildroot}%{_bindir}/llama-clblast-server -cp -p llama-simple %{buildroot}%{_bindir}/llama-clblast-simple - -mkdir -p %{buildroot}/usr/lib/systemd/system -%{__cat} < %{buildroot}/usr/lib/systemd/system/llamaclblast.service -[Unit] -Description=Llama.cpp server, CPU only (no GPU support in this build). -After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target - -[Service] -Type=simple -EnvironmentFile=/etc/sysconfig/llama -ExecStart=/usr/bin/llama-clblast-server $LLAMA_ARGS -ExecReload=/bin/kill -s HUP $MAINPID -Restart=never - -[Install] -WantedBy=default.target -EOF - -mkdir -p %{buildroot}/etc/sysconfig -%{__cat} < %{buildroot}/etc/sysconfig/llama -LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" -EOF - -%clean -rm -rf %{buildroot} -rm -rf %{_builddir}/* - -%files -%{_bindir}/llama-clblast-cli -%{_bindir}/llama-clblast-server -%{_bindir}/llama-clblast-simple -/usr/lib/systemd/system/llamaclblast.service -%config /etc/sysconfig/llama - - -%pre - -%post - -%preun -%postun - -%changelog diff --git a/.devops/llama-cpp-cuda.srpm.spec b/.devops/llama-cpp-cuda.srpm.spec index cbdf43626..7425d3a9d 100644 --- a/.devops/llama-cpp-cuda.srpm.spec +++ b/.devops/llama-cpp-cuda.srpm.spec @@ -32,7 +32,7 @@ CPU inference for Meta's Lllama2 models using default options. %setup -n llama.cpp-master %build -make -j LLAMA_CUDA=1 +make -j GGML_CUDA=1 %install mkdir -p %{buildroot}%{_bindir}/ diff --git a/.devops/llama-server-cuda.Dockerfile b/.devops/llama-server-cuda.Dockerfile index 7bef07a05..d7eaa0925 100644 --- a/.devops/llama-server-cuda.Dockerfile +++ b/.devops/llama-server-cuda.Dockerfile @@ -21,7 +21,7 @@ COPY . . # Set nvcc architecture ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} # Enable CUDA -ENV LLAMA_CUDA=1 +ENV GGML_CUDA=1 # Enable cURL ENV LLAMA_CURL=1 diff --git a/.devops/llama-server-intel.Dockerfile b/.devops/llama-server-intel.Dockerfile index 3bf1670ec..8f8fef8c0 100644 --- a/.devops/llama-server-intel.Dockerfile +++ b/.devops/llama-server-intel.Dockerfile @@ -2,7 +2,7 @@ ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04 FROM intel/oneapi-basekit:$ONEAPI_VERSION as build -ARG LLAMA_SYCL_F16=OFF +ARG GGML_SYCL_F16=OFF RUN apt-get update && \ apt-get install -y git libcurl4-openssl-dev @@ -10,11 +10,11 @@ WORKDIR /app COPY . . -RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ - echo "LLAMA_SYCL_F16 is set" && \ - export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ +RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ + echo "GGML_SYCL_F16 is set" && \ + export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ fi && \ - cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ + cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ cmake --build build --config Release --target llama-server FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime diff --git a/.devops/llama-server-rocm.Dockerfile b/.devops/llama-server-rocm.Dockerfile index 4b1cdc320..af96c3325 100644 --- a/.devops/llama-server-rocm.Dockerfile +++ b/.devops/llama-server-rocm.Dockerfile @@ -36,7 +36,7 @@ COPY . . # Set nvcc architecture ENV GPU_TARGETS=${ROCM_DOCKER_ARCH} # Enable ROCm -ENV LLAMA_HIPBLAS=1 +ENV GGML_HIPBLAS=1 ENV CC=/opt/rocm/llvm/bin/clang ENV CXX=/opt/rocm/llvm/bin/clang++ diff --git a/.devops/llama-server-vulkan.Dockerfile b/.devops/llama-server-vulkan.Dockerfile index 2bc2e45d3..49062f84b 100644 --- a/.devops/llama-server-vulkan.Dockerfile +++ b/.devops/llama-server-vulkan.Dockerfile @@ -14,7 +14,7 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key # Build it WORKDIR /app COPY . . -RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \ +RUN cmake -B build -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \ cmake --build build --config Release --target llama-server # Clean up