diff --git a/.devops/llama-cli-cuda.Dockerfile b/.devops/llama-cli-cuda.Dockerfile index b75163b94..3279c8da4 100644 --- a/.devops/llama-cli-cuda.Dockerfile +++ b/.devops/llama-cli-cuda.Dockerfile @@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ fi && \ cmake -B build -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release --target llama-cli -j$(nproc) + cmake --build build --config Release --target llama-cli -j$(nproc) && \ + mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime RUN apt-get update && \ apt-get install -y libgomp1 -COPY --from=build /app/build/ggml/src/libggml.so /libggml.so -COPY --from=build /app/build/src/libllama.so /libllama.so -COPY --from=build /app/build/bin/llama-cli /llama-cli +COPY --from=build /app/lib/ / +COPY --from=build /app/build/bin/llama-cli / ENTRYPOINT [ "/llama-cli" ] diff --git a/.devops/llama-cli-musa.Dockerfile b/.devops/llama-cli-musa.Dockerfile index b5696794f..1edf75cf2 100644 --- a/.devops/llama-cli-musa.Dockerfile +++ b/.devops/llama-cli-musa.Dockerfile @@ -16,15 +16,16 @@ WORKDIR /app COPY . . RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release --target llama-cli -j$(nproc) + cmake --build build --config Release --target llama-cli -j$(nproc) && \ + mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime RUN apt-get update && \ apt-get install -y libgomp1 -COPY --from=build /app/build/ggml/src/libggml.so /libggml.so -COPY --from=build /app/build/src/libllama.so /libllama.so +COPY --from=build /app/lib/ / COPY --from=build /app/build/bin/llama-cli /llama-cli ENTRYPOINT [ "/llama-cli" ] diff --git a/.devops/llama-server-cuda.Dockerfile b/.devops/llama-server-cuda.Dockerfile index a40e24205..ea07a4d52 100644 --- a/.devops/llama-server-cuda.Dockerfile +++ b/.devops/llama-server-cuda.Dockerfile @@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ fi && \ cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release --target llama-server -j$(nproc) + cmake --build build --config Release --target llama-server -j$(nproc) && \ + mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime RUN apt-get update && \ apt-get install -y libcurl4-openssl-dev libgomp1 curl -COPY --from=build /app/build/ggml/src/libggml.so /libggml.so -COPY --from=build /app/build/src/libllama.so /libllama.so +COPY --from=build /app/lib/ / COPY --from=build /app/build/bin/llama-server /llama-server # Must be set to 0.0.0.0 so it can listen to requests from host machine diff --git a/.devops/llama-server-musa.Dockerfile b/.devops/llama-server-musa.Dockerfile index 193a6d77c..259877468 100644 --- a/.devops/llama-server-musa.Dockerfile +++ b/.devops/llama-server-musa.Dockerfile @@ -16,15 +16,16 @@ WORKDIR /app COPY . . RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release --target llama-server -j$(nproc) + cmake --build build --config Release --target llama-server -j$(nproc) && \ + mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime RUN apt-get update && \ apt-get install -y libcurl4-openssl-dev libgomp1 curl -COPY --from=build /app/build/ggml/src/libggml.so /libggml.so -COPY --from=build /app/build/src/libllama.so /libllama.so +COPY --from=build /app/lib/ / COPY --from=build /app/build/bin/llama-server /llama-server # Must be set to 0.0.0.0 so it can listen to requests from host machine