diff --git a/.devops/full-cuda.Dockerfile b/.devops/full-cuda.Dockerfile index 8cc1480d3..059fd2695 100644 --- a/.devops/full-cuda.Dockerfile +++ b/.devops/full-cuda.Dockerfile @@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build ARG CUDA_DOCKER_ARCH=all RUN apt-get update && \ - apt-get install -y build-essential python3 python3-pip git + apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev COPY requirements.txt requirements.txt COPY requirements requirements @@ -28,6 +28,8 @@ COPY . . ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} # Enable CUDA ENV LLAMA_CUDA=1 +# Enable cURL +ENV LLAMA_CURL=1 RUN make diff --git a/.devops/full-rocm.Dockerfile b/.devops/full-rocm.Dockerfile index 8b9633dc4..6ecf3bcc7 100644 --- a/.devops/full-rocm.Dockerfile +++ b/.devops/full-rocm.Dockerfile @@ -40,6 +40,11 @@ ENV LLAMA_HIPBLAS=1 ENV CC=/opt/rocm/llvm/bin/clang ENV CXX=/opt/rocm/llvm/bin/clang++ +# Enable cURL +ENV LLAMA_CURL=1 +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + RUN make ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/.devops/full.Dockerfile b/.devops/full.Dockerfile index cef1297d3..432fb5dad 100644 --- a/.devops/full.Dockerfile +++ b/.devops/full.Dockerfile @@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04 FROM ubuntu:$UBUNTU_VERSION as build RUN apt-get update && \ - apt-get install -y build-essential python3 python3-pip git + apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev COPY requirements.txt requirements.txt COPY requirements requirements @@ -15,6 +15,9 @@ WORKDIR /app COPY . . +ENV LLAMA_CURL=1 + + RUN make ENV LC_ALL=C.utf8 diff --git a/.devops/server-cuda.Dockerfile b/.devops/server-cuda.Dockerfile index 5683a3646..59a52ba21 100644 --- a/.devops/server-cuda.Dockerfile +++ b/.devops/server-cuda.Dockerfile @@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build ARG CUDA_DOCKER_ARCH=all RUN apt-get update && \ - apt-get install -y build-essential git + apt-get install -y build-essential git libcurl4-openssl-dev WORKDIR /app @@ -22,11 +22,16 @@ COPY . . ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} # Enable CUDA ENV LLAMA_CUDA=1 +# Enable cURL +ENV LLAMA_CURL=1 RUN make FROM ${BASE_CUDA_RUN_CONTAINER} as runtime +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + COPY --from=build /app/server /server ENTRYPOINT [ "/server" ] diff --git a/.devops/server-intel.Dockerfile b/.devops/server-intel.Dockerfile index 312f2df80..304487335 100644 --- a/.devops/server-intel.Dockerfile +++ b/.devops/server-intel.Dockerfile @@ -4,7 +4,7 @@ FROM intel/oneapi-basekit:$ONEAPI_VERSION as build ARG LLAMA_SYCL_F16=OFF RUN apt-get update && \ - apt-get install -y git + apt-get install -y git libcurl4-openssl-dev WORKDIR /app @@ -16,11 +16,14 @@ RUN mkdir build && \ echo "LLAMA_SYCL_F16 is set" && \ export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ fi && \ - cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \ + cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ cmake --build . --config Release --target server FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + COPY --from=build /app/build/bin/server /server ENV LC_ALL=C.utf8 diff --git a/.devops/server-rocm.Dockerfile b/.devops/server-rocm.Dockerfile index e9a31647c..c02a31dd8 100644 --- a/.devops/server-rocm.Dockerfile +++ b/.devops/server-rocm.Dockerfile @@ -40,6 +40,11 @@ ENV LLAMA_HIPBLAS=1 ENV CC=/opt/rocm/llvm/bin/clang ENV CXX=/opt/rocm/llvm/bin/clang++ +# Enable cURL +ENV LLAMA_CURL=1 +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + RUN make ENTRYPOINT [ "/app/server" ] diff --git a/.devops/server-vulkan.Dockerfile b/.devops/server-vulkan.Dockerfile index e0add6fc3..7e5a5283b 100644 --- a/.devops/server-vulkan.Dockerfile +++ b/.devops/server-vulkan.Dockerfile @@ -11,12 +11,16 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key apt update -y && \ apt-get install -y vulkan-sdk +# Install cURL +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + # Build it WORKDIR /app COPY . . RUN mkdir build && \ cd build && \ - cmake .. -DLLAMA_VULKAN=1 && \ + cmake .. -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \ cmake --build . --config Release --target server # Clean up diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 59fcfc27f..758796632 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -24,15 +24,15 @@ on: push: branches: - master - paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*'] + paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp'] pull_request_target: types: [opened, synchronize, reopened] - paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*'] + paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp'] schedule: - cron: '04 2 * * *' concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}-${{ github.event.inputs.sha }} cancel-in-progress: true jobs: @@ -42,6 +42,16 @@ jobs: RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it N_USERS: 8 DURATION: 10m + + strategy: + matrix: + model: [phi-2] + ftype: [q4_0, q8_0, f16] + include: + - model: phi-2 + ftype: q4_0 + pr_comment_enabled: "true" + if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.head_ref == 'master' || github.ref_name == 'master' || github.event.push.ref == 'refs/heads/master' }} steps: - name: Clone @@ -69,12 +79,18 @@ jobs: sleep 0.1 done - - name: Install k6 + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + + - name: Install k6 and xk6-sse id: k6_installation run: | cd examples/server/bench - wget --quiet https://github.com/grafana/k6/releases/download/v0.49.0/k6-v0.49.0-linux-amd64.tar.gz - tar xzf k6*.tar.gz --strip-components=1 + go install go.k6.io/xk6/cmd/xk6@latest + xk6 build master \ + --with github.com/phymbert/xk6-sse - name: Build id: cmake_build @@ -108,7 +124,7 @@ jobs: cd examples/server/bench source venv/bin/activate - BENCH_K6_BIN_PATH=./k6 python bench.py \ + python bench.py \ --runner-label ${{ env.RUNNER_LABEL }} \ --name ${{ github.job }} \ --branch ${{ github.head_ref || github.ref_name }} \ @@ -116,7 +132,7 @@ jobs: --scenario script.js \ --duration ${{ github.event.inputs.duration || env.DURATION }} \ --hf-repo ggml-org/models \ - --hf-file phi-2/ggml-model-q4_0.gguf \ + --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \ --model-path-prefix /models \ --parallel ${{ env.N_USERS }} \ -ngl 33 \ @@ -134,7 +150,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: benchmark-results + name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} compression-level: 9 path: | examples/server/bench/*.jpg @@ -146,7 +162,7 @@ jobs: with: authToken: ${{secrets.GITHUB_TOKEN}} sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }} - context: bench-server-baseline + context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} description: | ${{ env.BENCH_RESULTS }} state: 'success' @@ -203,21 +219,26 @@ jobs: - name: Comment PR uses: mshick/add-pr-comment@v2 id: comment_pr - if: ${{ github.event.pull_request != '' }} + if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }} with: - message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }} + message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} message: | - 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 +
- - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }} - - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }} - - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }}tk/s p(90)=${{ env.LLAMACPP_PROMPT_TOKENS_P_90_ }}tk/s **total=${{ env.LLAMACPP_PROMPT_TOKENS_TOTAL_COUNTER_RATE }}tk/s** - - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s **total=${{ env.LLAMACPP_COMPLETION_TOKENS_TOTAL_COUNTER_RATE }}tk/s** - - ${{ env.BENCH_GRAPH_XLABEL }} + 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 + +
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index f12b3a138..ff7238aba 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -16,7 +16,7 @@ on:
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m']
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
@@ -78,8 +78,8 @@ jobs:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
uses: actions/upload-artifact@v4
with:
- path: |
- llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
+ name: llama-bin-macos-arm64.zip
macOS-latest-cmake-x64:
runs-on: macos-latest
@@ -134,8 +134,8 @@ jobs:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
uses: actions/upload-artifact@v4
with:
- path: |
- llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
+ name: llama-bin-macos-x64.zip
ubuntu-focal-make:
runs-on: ubuntu-20.04
@@ -725,8 +725,8 @@ jobs:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
uses: actions/upload-artifact@v4
with:
- path: |
- llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-x64.zip
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-x64.zip
+ name: llama-bin-win-${{ matrix.build }}-x64.zip
windows-latest-cmake-cuda:
runs-on: windows-latest
@@ -781,8 +781,8 @@ jobs:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
uses: actions/upload-artifact@v4
with:
- path: |
- llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
+ name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
- name: Copy and pack Cuda runtime
run: |
@@ -795,8 +795,8 @@ jobs:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
uses: actions/upload-artifact@v4
with:
- path: |
- cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
+ path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
+ name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
windows-latest-cmake-sycl:
runs-on: windows-latest
@@ -846,8 +846,8 @@ jobs:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
uses: actions/upload-artifact@v4
with:
- path: |
- llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
+ name: llama-bin-win-sycl-x64.zip
ios-xcode-build:
runs-on: macos-latest
diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml
index a1cba103b..f12c558f8 100644
--- a/.github/workflows/code-coverage.yml
+++ b/.github/workflows/code-coverage.yml
@@ -6,7 +6,7 @@ env:
GGML_N_THREADS: 1
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 4c5c87efc..eefd87878 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -16,7 +16,7 @@ on:
- master
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
diff --git a/.github/workflows/editorconfig.yml b/.github/workflows/editorconfig.yml
index 9f4c365f2..ae86e9927 100644
--- a/.github/workflows/editorconfig.yml
+++ b/.github/workflows/editorconfig.yml
@@ -15,7 +15,7 @@ on:
- master
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
diff --git a/.github/workflows/nix-ci-aarch64.yml b/.github/workflows/nix-ci-aarch64.yml
index 109a793ea..4aa4b2379 100644
--- a/.github/workflows/nix-ci-aarch64.yml
+++ b/.github/workflows/nix-ci-aarch64.yml
@@ -18,7 +18,7 @@ on:
paths: ['**/*.nix', 'flake.lock']
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
diff --git a/.github/workflows/nix-ci.yml b/.github/workflows/nix-ci.yml
index 8b5b99c8f..8955f38d0 100644
--- a/.github/workflows/nix-ci.yml
+++ b/.github/workflows/nix-ci.yml
@@ -9,7 +9,7 @@ on:
types: [opened, synchronize, reopened]
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
diff --git a/.github/workflows/python-check-requirements.yml b/.github/workflows/python-check-requirements.yml
index 4adb28268..4e0374fc6 100644
--- a/.github/workflows/python-check-requirements.yml
+++ b/.github/workflows/python-check-requirements.yml
@@ -17,7 +17,7 @@ on:
- 'requirements/*.txt'
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml
index 7fd514231..f4ae65495 100644
--- a/.github/workflows/python-lint.yml
+++ b/.github/workflows/python-lint.yml
@@ -3,7 +3,7 @@ name: flake8 Lint
on: [push, pull_request]
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
index ccea38e04..521cc29ae 100644
--- a/.github/workflows/server.yml
+++ b/.github/workflows/server.yml
@@ -23,7 +23,7 @@ on:
- cron: '2 4 * * *'
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
diff --git a/.github/workflows/zig-build.yml b/.github/workflows/zig-build.yml
index 658ca8da6..747c35cc0 100644
--- a/.github/workflows/zig-build.yml
+++ b/.github/workflows/zig-build.yml
@@ -7,7 +7,7 @@ on:
- master
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 000000000..b029f13da
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,655 @@
+# date: Tue Apr 9 09:17:14 EEST 2024
+# this file is auto-generated by scripts/gen-authors.sh
+
+0cc4m