mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 02:44:36 +00:00
flake.nix : rewrite (#4605)
* flake.lock: update to hotfix CUDA::cuda_driver Required to support https://github.com/ggerganov/llama.cpp/pull/4606 * flake.nix: rewrite 1. Split into separate files per output. 2. Added overlays, so that this flake can be integrated into others. The names in the overlay are `llama-cpp`, `llama-cpp-opencl`, `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs). 3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/) rather than `with pkgs;` so that there's dependency injection rather than dependency lookup. 4. Add a description and meta information for each package. The description includes a bit about what's trying to accelerate each one. 5. Use specific CUDA packages instead of cudatoolkit on the advice of SomeoneSerge. 6. Format with `serokell/nixfmt` for a consistent style. 7. Update `flake.lock` with the latest goods. * flake.nix: use finalPackage instead of passing it manually * nix: unclutter darwin support * nix: pass most darwin frameworks unconditionally ...for simplicity * *.nix: nixfmt nix shell github:piegamesde/nixfmt/rfc101-style --command \ nixfmt flake.nix .devops/nix/*.nix * flake.nix: add maintainers * nix: move meta down to follow Nixpkgs style more closely * nix: add missing meta attributes nix: clarify the interpretation of meta.maintainers nix: clarify the meaning of "broken" and "badPlatforms" nix: passthru: expose the use* flags for inspection E.g.: ``` ❯ nix eval .#cuda.useCuda true ``` * flake.nix: avoid re-evaluating nixpkgs too many times * flake.nix: use flake-parts * nix: migrate to pname+version * flake.nix: overlay: expose both the namespace and the default attribute * ci: add the (Nix) flakestry workflow * nix: cmakeFlags: explicit OFF bools * nix: cuda: reduce runtime closure * nix: fewer rebuilds * nix: respect config.cudaCapabilities * nix: add the impure driver's location to the DT_RUNPATHs * nix: clean sources more thoroughly ...this way outPaths change less frequently, and so there are fewer rebuilds * nix: explicit mpi support * nix: explicit jetson support * flake.nix: darwin: only expose the default --------- Co-authored-by: Someone Serge <sergei.kozlukov@aalto.fi>
This commit is contained in:
parent
97bbca6e85
commit
68eccbdc5b
22
.devops/nix/apps.nix
Normal file
22
.devops/nix/apps.nix
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
perSystem =
|
||||||
|
{ config, lib, ... }:
|
||||||
|
{
|
||||||
|
apps =
|
||||||
|
let
|
||||||
|
inherit (config.packages) default;
|
||||||
|
binaries = [
|
||||||
|
"llama"
|
||||||
|
"llama-embedding"
|
||||||
|
"llama-server"
|
||||||
|
"quantize"
|
||||||
|
"train-text-from-scratch"
|
||||||
|
];
|
||||||
|
mkApp = name: {
|
||||||
|
type = "app";
|
||||||
|
program = "${default}/bin/${name}";
|
||||||
|
};
|
||||||
|
in
|
||||||
|
lib.genAttrs binaries mkApp;
|
||||||
|
};
|
||||||
|
}
|
13
.devops/nix/devshells.nix
Normal file
13
.devops/nix/devshells.nix
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
perSystem =
|
||||||
|
{ config, lib, ... }:
|
||||||
|
{
|
||||||
|
devShells =
|
||||||
|
lib.concatMapAttrs
|
||||||
|
(name: package: {
|
||||||
|
${name} = package.passthru.shell;
|
||||||
|
${name + "-extra"} = package.passthru.shell-extra;
|
||||||
|
})
|
||||||
|
config.packages;
|
||||||
|
};
|
||||||
|
}
|
32
.devops/nix/jetson-support.nix
Normal file
32
.devops/nix/jetson-support.nix
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
{ inputs, ... }:
|
||||||
|
{
|
||||||
|
perSystem =
|
||||||
|
{
|
||||||
|
config,
|
||||||
|
system,
|
||||||
|
lib,
|
||||||
|
pkgsCuda,
|
||||||
|
...
|
||||||
|
}:
|
||||||
|
lib.optionalAttrs (system == "aarch64-linux") {
|
||||||
|
packages =
|
||||||
|
let
|
||||||
|
caps.jetson-xavier = "7.2";
|
||||||
|
caps.jetson-orin = "8.7";
|
||||||
|
caps.jetson-nano = "5.3";
|
||||||
|
|
||||||
|
pkgsFor =
|
||||||
|
cap:
|
||||||
|
import inputs.nixpkgs {
|
||||||
|
inherit system;
|
||||||
|
config = {
|
||||||
|
cudaSupport = true;
|
||||||
|
cudaCapabilities = [ cap ];
|
||||||
|
cudaEnableForwardCompat = false;
|
||||||
|
inherit (pkgsCuda.config) allowUnfreePredicate;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
in
|
||||||
|
builtins.mapAttrs (name: cap: ((pkgsFor cap).callPackage ./scope.nix { }).llama-cpp) caps;
|
||||||
|
};
|
||||||
|
}
|
35
.devops/nix/nixpkgs-instances.nix
Normal file
35
.devops/nix/nixpkgs-instances.nix
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
{ inputs, ... }:
|
||||||
|
{
|
||||||
|
# The _module.args definitions are passed on to modules as arguments. E.g.
|
||||||
|
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
|
||||||
|
# `_module.args.pkgs` (defined in this case by flake-parts).
|
||||||
|
perSystem =
|
||||||
|
{ system, ... }:
|
||||||
|
{
|
||||||
|
_module.args = {
|
||||||
|
pkgsCuda = import inputs.nixpkgs {
|
||||||
|
inherit system;
|
||||||
|
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
|
||||||
|
# and ucx are built with CUDA support)
|
||||||
|
config.cudaSupport = true;
|
||||||
|
config.allowUnfreePredicate =
|
||||||
|
p:
|
||||||
|
builtins.all
|
||||||
|
(
|
||||||
|
license:
|
||||||
|
license.free
|
||||||
|
|| builtins.elem license.shortName [
|
||||||
|
"CUDA EULA"
|
||||||
|
"cuDNN EULA"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
(p.meta.licenses or [ p.meta.license ]);
|
||||||
|
};
|
||||||
|
# Ensure dependencies use ROCm consistently
|
||||||
|
pkgsRocm = import inputs.nixpkgs {
|
||||||
|
inherit system;
|
||||||
|
config.rocmSupport = true;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
265
.devops/nix/package.nix
Normal file
265
.devops/nix/package.nix
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
config,
|
||||||
|
stdenv,
|
||||||
|
mkShell,
|
||||||
|
cmake,
|
||||||
|
ninja,
|
||||||
|
pkg-config,
|
||||||
|
git,
|
||||||
|
python3,
|
||||||
|
mpi,
|
||||||
|
openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations
|
||||||
|
cudaPackages,
|
||||||
|
darwin,
|
||||||
|
rocmPackages,
|
||||||
|
clblast,
|
||||||
|
useBlas ? builtins.all (x: !x) [
|
||||||
|
useCuda
|
||||||
|
useMetalKit
|
||||||
|
useOpenCL
|
||||||
|
useRocm
|
||||||
|
],
|
||||||
|
useCuda ? config.cudaSupport,
|
||||||
|
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
|
||||||
|
useMpi ? false, # Increases the runtime closure size by ~700M
|
||||||
|
useOpenCL ? false,
|
||||||
|
useRocm ? config.rocmSupport,
|
||||||
|
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
||||||
|
}@inputs:
|
||||||
|
|
||||||
|
let
|
||||||
|
inherit (lib)
|
||||||
|
cmakeBool
|
||||||
|
cmakeFeature
|
||||||
|
optionals
|
||||||
|
strings
|
||||||
|
versionOlder
|
||||||
|
;
|
||||||
|
|
||||||
|
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
||||||
|
# otherwise we get libstdc++ errors downstream.
|
||||||
|
stdenv = throw "Use effectiveStdenv instead";
|
||||||
|
effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
|
||||||
|
|
||||||
|
suffices =
|
||||||
|
lib.optionals useBlas [ "BLAS" ]
|
||||||
|
++ lib.optionals useCuda [ "CUDA" ]
|
||||||
|
++ lib.optionals useMetalKit [ "MetalKit" ]
|
||||||
|
++ lib.optionals useMpi [ "MPI" ]
|
||||||
|
++ lib.optionals useOpenCL [ "OpenCL" ]
|
||||||
|
++ lib.optionals useRocm [ "ROCm" ];
|
||||||
|
|
||||||
|
pnameSuffix =
|
||||||
|
strings.optionalString (suffices != [ ])
|
||||||
|
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
||||||
|
descriptionSuffix =
|
||||||
|
strings.optionalString (suffices != [ ])
|
||||||
|
", accelerated with ${strings.concatStringsSep ", " suffices}";
|
||||||
|
|
||||||
|
# TODO: package the Python in this repository in a Nix-like way.
|
||||||
|
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
|
||||||
|
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
|
||||||
|
# https://peps.python.org/pep-0517/
|
||||||
|
llama-python = python3.withPackages (
|
||||||
|
ps: [
|
||||||
|
ps.numpy
|
||||||
|
ps.sentencepiece
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
|
||||||
|
llama-python-extra = python3.withPackages (
|
||||||
|
ps: [
|
||||||
|
ps.numpy
|
||||||
|
ps.sentencepiece
|
||||||
|
ps.torchWithoutCuda
|
||||||
|
ps.transformers
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
|
||||||
|
# separately
|
||||||
|
darwinBuildInputs =
|
||||||
|
with darwin.apple_sdk.frameworks;
|
||||||
|
[
|
||||||
|
Accelerate
|
||||||
|
CoreVideo
|
||||||
|
CoreGraphics
|
||||||
|
]
|
||||||
|
++ optionals useMetalKit [ MetalKit ];
|
||||||
|
|
||||||
|
cudaBuildInputs = with cudaPackages; [
|
||||||
|
cuda_cccl.dev # <nv/target>
|
||||||
|
|
||||||
|
# A temporary hack for reducing the closure size, remove once cudaPackages
|
||||||
|
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
|
||||||
|
cuda_cudart.dev
|
||||||
|
cuda_cudart.lib
|
||||||
|
cuda_cudart.static
|
||||||
|
libcublas.dev
|
||||||
|
libcublas.lib
|
||||||
|
libcublas.static
|
||||||
|
];
|
||||||
|
|
||||||
|
rocmBuildInputs = with rocmPackages; [
|
||||||
|
clr
|
||||||
|
hipblas
|
||||||
|
rocblas
|
||||||
|
];
|
||||||
|
in
|
||||||
|
|
||||||
|
effectiveStdenv.mkDerivation (
|
||||||
|
finalAttrs: {
|
||||||
|
pname = "llama-cpp${pnameSuffix}";
|
||||||
|
version = llamaVersion;
|
||||||
|
|
||||||
|
src = lib.cleanSourceWith {
|
||||||
|
filter =
|
||||||
|
name: type:
|
||||||
|
!(builtins.any (_: _) [
|
||||||
|
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
||||||
|
(name == "README.md") # Ignore *.md changes whe computing outPaths
|
||||||
|
(lib.hasPrefix "." name) # Skip hidden files and directories
|
||||||
|
]);
|
||||||
|
src = lib.cleanSource ../../.;
|
||||||
|
};
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
substituteInPlace ./ggml-metal.m \
|
||||||
|
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
||||||
|
|
||||||
|
# TODO: Package up each Python script or service appropriately.
|
||||||
|
# If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
|
||||||
|
# we could make those *.py into setuptools' entrypoints
|
||||||
|
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
|
||||||
|
'';
|
||||||
|
|
||||||
|
nativeBuildInputs =
|
||||||
|
[
|
||||||
|
cmake
|
||||||
|
ninja
|
||||||
|
pkg-config
|
||||||
|
git
|
||||||
|
]
|
||||||
|
++ optionals useCuda [
|
||||||
|
cudaPackages.cuda_nvcc
|
||||||
|
|
||||||
|
# TODO: Replace with autoAddDriverRunpath
|
||||||
|
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
|
||||||
|
cudaPackages.autoAddOpenGLRunpathHook
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
||||||
|
++ optionals useCuda cudaBuildInputs
|
||||||
|
++ optionals useMpi [ mpi ]
|
||||||
|
++ optionals useOpenCL [ clblast ]
|
||||||
|
++ optionals useRocm rocmBuildInputs;
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
(cmakeBool "LLAMA_NATIVE" true)
|
||||||
|
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
||||||
|
(cmakeBool "BUILD_SHARED_LIBS" true)
|
||||||
|
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
||||||
|
(cmakeBool "LLAMA_BLAS" useBlas)
|
||||||
|
(cmakeBool "LLAMA_CLBLAST" useOpenCL)
|
||||||
|
(cmakeBool "LLAMA_CUBLAS" useCuda)
|
||||||
|
(cmakeBool "LLAMA_HIPBLAS" useRocm)
|
||||||
|
(cmakeBool "LLAMA_METAL" useMetalKit)
|
||||||
|
(cmakeBool "LLAMA_MPI" useMpi)
|
||||||
|
]
|
||||||
|
++ optionals useCuda [
|
||||||
|
(
|
||||||
|
with cudaPackages.flags;
|
||||||
|
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
||||||
|
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
++ optionals useRocm [
|
||||||
|
(cmakeFeature "CMAKE_C_COMPILER" "hipcc")
|
||||||
|
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
|
||||||
|
|
||||||
|
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
|
||||||
|
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
|
||||||
|
# and select the line that matches the current nixpkgs version of rocBLAS.
|
||||||
|
# Should likely use `rocmPackages.clr.gpuTargets`.
|
||||||
|
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
|
||||||
|
]
|
||||||
|
++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
|
||||||
|
++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
|
||||||
|
|
||||||
|
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
||||||
|
# if they haven't been added yet.
|
||||||
|
postInstall = ''
|
||||||
|
mv $out/bin/main $out/bin/llama
|
||||||
|
mv $out/bin/server $out/bin/llama-server
|
||||||
|
mkdir -p $out/include
|
||||||
|
cp $src/llama.h $out/include/
|
||||||
|
'';
|
||||||
|
|
||||||
|
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
|
||||||
|
passthru = {
|
||||||
|
inherit
|
||||||
|
useBlas
|
||||||
|
useCuda
|
||||||
|
useMetalKit
|
||||||
|
useMpi
|
||||||
|
useOpenCL
|
||||||
|
useRocm
|
||||||
|
;
|
||||||
|
|
||||||
|
shell = mkShell {
|
||||||
|
name = "shell-${finalAttrs.finalPackage.name}";
|
||||||
|
description = "contains numpy and sentencepiece";
|
||||||
|
buildInputs = [ llama-python ];
|
||||||
|
inputsFrom = [ finalAttrs.finalPackage ];
|
||||||
|
};
|
||||||
|
|
||||||
|
shell-extra = mkShell {
|
||||||
|
name = "shell-extra-${finalAttrs.finalPackage.name}";
|
||||||
|
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
|
||||||
|
buildInputs = [ llama-python-extra ];
|
||||||
|
inputsFrom = [ finalAttrs.finalPackage ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = {
|
||||||
|
# Configurations we don't want even the CI to evaluate. Results in the
|
||||||
|
# "unsupported platform" messages. This is mostly a no-op, because
|
||||||
|
# cudaPackages would've refused to evaluate anyway.
|
||||||
|
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
|
||||||
|
|
||||||
|
# Configurations that are known to result in build failures. Can be
|
||||||
|
# overridden by importing Nixpkgs with `allowBroken = true`.
|
||||||
|
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
||||||
|
|
||||||
|
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
||||||
|
homepage = "https://github.com/ggerganov/llama.cpp/";
|
||||||
|
license = lib.licenses.mit;
|
||||||
|
|
||||||
|
# Accommodates `nix run` and `lib.getExe`
|
||||||
|
mainProgram = "llama";
|
||||||
|
|
||||||
|
# These people might respond, on the best effort basis, if you ping them
|
||||||
|
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
||||||
|
# Consider adding yourself to this list if you want to ensure this flake
|
||||||
|
# stays maintained and you're willing to invest your time. Do not add
|
||||||
|
# other people without their consent. Consider removing people after
|
||||||
|
# they've been unreachable for long periods of time.
|
||||||
|
|
||||||
|
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
||||||
|
# an attrset following the same format as in
|
||||||
|
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
||||||
|
maintainers = with lib.maintainers; [
|
||||||
|
philiptaron
|
||||||
|
SomeoneSerge
|
||||||
|
];
|
||||||
|
|
||||||
|
# Extend `badPlatforms` instead
|
||||||
|
platforms = lib.platforms.all;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
)
|
12
.devops/nix/scope.nix
Normal file
12
.devops/nix/scope.nix
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
newScope,
|
||||||
|
llamaVersion ? "0.0.0",
|
||||||
|
}:
|
||||||
|
|
||||||
|
lib.makeScope newScope (
|
||||||
|
self: {
|
||||||
|
inherit llamaVersion;
|
||||||
|
llama-cpp = self.callPackage ./package.nix { };
|
||||||
|
}
|
||||||
|
)
|
23
.github/workflows/nix-flakestry.yml
vendored
Normal file
23
.github/workflows/nix-flakestry.yml
vendored
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Make the flake discoverable on https://flakestry.dev
|
||||||
|
name: "Publish a flake to flakestry"
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- "v?[0-9]+.[0-9]+.[0-9]+"
|
||||||
|
- "v?[0-9]+.[0-9]+"
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
tag:
|
||||||
|
description: "The existing tag to publish"
|
||||||
|
type: "string"
|
||||||
|
required: true
|
||||||
|
jobs:
|
||||||
|
publish-flake:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
id-token: "write"
|
||||||
|
contents: "read"
|
||||||
|
steps:
|
||||||
|
- uses: flakestry/flakestry-publish@main
|
||||||
|
with:
|
||||||
|
version: "${{ inputs.tag || github.ref_name }}"
|
55
flake.lock
55
flake.lock
@ -1,30 +1,30 @@
|
|||||||
{
|
{
|
||||||
"nodes": {
|
"nodes": {
|
||||||
"flake-utils": {
|
"flake-parts": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"systems": "systems"
|
"nixpkgs-lib": "nixpkgs-lib"
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1694529238,
|
"lastModified": 1701473968,
|
||||||
"narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=",
|
"narHash": "sha256-YcVE5emp1qQ8ieHUnxt1wCZCC3ZfAS+SRRWZ2TMda7E=",
|
||||||
"owner": "numtide",
|
"owner": "hercules-ci",
|
||||||
"repo": "flake-utils",
|
"repo": "flake-parts",
|
||||||
"rev": "ff7b65b44d01cf9ba6a71320833626af21126384",
|
"rev": "34fed993f1674c8d06d58b37ce1e0fe5eebcb9f5",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"owner": "numtide",
|
"owner": "hercules-ci",
|
||||||
"repo": "flake-utils",
|
"repo": "flake-parts",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1698318101,
|
"lastModified": 1703559957,
|
||||||
"narHash": "sha256-gUihHt3yPD7bVqg+k/UVHgngyaJ3DMEBchbymBMvK1E=",
|
"narHash": "sha256-x9PUuMEPGUOMB51zNxrDr2QoHbYWlCS2xhFedm9MC5Q=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "63678e9f3d3afecfeafa0acead6239cdb447574c",
|
"rev": "75dd68c36f458c6593c5bbb48abfd3e59bfed380",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
@ -34,26 +34,29 @@
|
|||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"root": {
|
"nixpkgs-lib": {
|
||||||
"inputs": {
|
|
||||||
"flake-utils": "flake-utils",
|
|
||||||
"nixpkgs": "nixpkgs"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"systems": {
|
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1681028828,
|
"dir": "lib",
|
||||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
"lastModified": 1701253981,
|
||||||
"owner": "nix-systems",
|
"narHash": "sha256-ztaDIyZ7HrTAfEEUt9AtTDNoCYxUdSd6NrRHaYOIxtk=",
|
||||||
"repo": "default",
|
"owner": "NixOS",
|
||||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
"repo": "nixpkgs",
|
||||||
|
"rev": "e92039b55bcd58469325ded85d4f58dd5a4eaf58",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"owner": "nix-systems",
|
"dir": "lib",
|
||||||
"repo": "default",
|
"owner": "NixOS",
|
||||||
|
"ref": "nixos-unstable",
|
||||||
|
"repo": "nixpkgs",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"inputs": {
|
||||||
|
"flake-parts": "flake-parts",
|
||||||
|
"nixpkgs": "nixpkgs"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"root": "root",
|
"root": "root",
|
||||||
|
218
flake.nix
218
flake.nix
@ -1,139 +1,99 @@
|
|||||||
{
|
{
|
||||||
|
description = "Port of Facebook's LLaMA model in C/C++";
|
||||||
|
|
||||||
inputs = {
|
inputs = {
|
||||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||||
flake-utils.url = "github:numtide/flake-utils";
|
flake-parts.url = "github:hercules-ci/flake-parts";
|
||||||
};
|
};
|
||||||
outputs = { self, nixpkgs, flake-utils }:
|
|
||||||
flake-utils.lib.eachDefaultSystem (system:
|
# For inspection, use `nix flake show github:ggerganov/llama.cpp` or the nix repl:
|
||||||
|
#
|
||||||
|
# ```bash
|
||||||
|
# ❯ nix repl
|
||||||
|
# nix-repl> :lf github:ggerganov/llama.cpp
|
||||||
|
# Added 13 variables.
|
||||||
|
# nix-repl> outputs.apps.x86_64-linux.quantize
|
||||||
|
# { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/quantize"; type = "app"; }
|
||||||
|
# ```
|
||||||
|
outputs =
|
||||||
|
{ self, flake-parts, ... }@inputs:
|
||||||
let
|
let
|
||||||
name = "llama.cpp";
|
# We could include the git revisions in the package names but those would
|
||||||
src = ./.;
|
# needlessly trigger rebuilds:
|
||||||
meta.mainProgram = "llama";
|
# llamaVersion = self.dirtyShortRev or self.shortRev;
|
||||||
inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin;
|
|
||||||
buildInputs = with pkgs; [ openmpi ];
|
# Nix already uses cryptographic hashes for versioning, so we'll just fix
|
||||||
osSpecific = with pkgs; buildInputs ++ (
|
# the fake semver for now:
|
||||||
if isAarch64 && isDarwin then
|
llamaVersion = "0.0.0";
|
||||||
with pkgs.darwin.apple_sdk_11_0.frameworks; [
|
|
||||||
Accelerate
|
|
||||||
MetalKit
|
|
||||||
]
|
|
||||||
else if isAarch32 && isDarwin then
|
|
||||||
with pkgs.darwin.apple_sdk.frameworks; [
|
|
||||||
Accelerate
|
|
||||||
CoreGraphics
|
|
||||||
CoreVideo
|
|
||||||
]
|
|
||||||
else if isDarwin then
|
|
||||||
with pkgs.darwin.apple_sdk.frameworks; [
|
|
||||||
Accelerate
|
|
||||||
CoreGraphics
|
|
||||||
CoreVideo
|
|
||||||
]
|
|
||||||
else
|
|
||||||
with pkgs; [ openblas ]
|
|
||||||
);
|
|
||||||
pkgs = import nixpkgs { inherit system; };
|
|
||||||
nativeBuildInputs = with pkgs; [ cmake ninja pkg-config ];
|
|
||||||
cudatoolkit_joined = with pkgs; symlinkJoin {
|
|
||||||
# HACK(Green-Sky): nix currently has issues with cmake findcudatoolkit
|
|
||||||
# see https://github.com/NixOS/nixpkgs/issues/224291
|
|
||||||
# copied from jaxlib
|
|
||||||
name = "${cudaPackages.cudatoolkit.name}-merged";
|
|
||||||
paths = [
|
|
||||||
cudaPackages.cudatoolkit.lib
|
|
||||||
cudaPackages.cudatoolkit.out
|
|
||||||
] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [
|
|
||||||
# for some reason some of the required libs are in the targets/x86_64-linux
|
|
||||||
# directory; not sure why but this works around it
|
|
||||||
"${cudaPackages.cudatoolkit}/targets/${system}"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
llama-python =
|
|
||||||
pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]);
|
|
||||||
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
|
|
||||||
llama-python-extra =
|
|
||||||
pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]);
|
|
||||||
postPatch = ''
|
|
||||||
substituteInPlace ./ggml-metal.m \
|
|
||||||
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
|
||||||
substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python'
|
|
||||||
'';
|
|
||||||
postInstall = ''
|
|
||||||
mv $out/bin/main $out/bin/llama
|
|
||||||
mv $out/bin/server $out/bin/llama-server
|
|
||||||
mkdir -p $out/include
|
|
||||||
cp ${src}/llama.h $out/include/
|
|
||||||
'';
|
|
||||||
cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ];
|
|
||||||
in
|
in
|
||||||
|
flake-parts.lib.mkFlake { inherit inputs; }
|
||||||
|
|
||||||
{
|
{
|
||||||
packages.default = pkgs.stdenv.mkDerivation {
|
|
||||||
inherit name src meta postPatch nativeBuildInputs postInstall;
|
imports = [
|
||||||
buildInputs = osSpecific;
|
.devops/nix/nixpkgs-instances.nix
|
||||||
cmakeFlags = cmakeFlags
|
.devops/nix/apps.nix
|
||||||
++ (if isAarch64 && isDarwin then [
|
.devops/nix/devshells.nix
|
||||||
"-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
|
.devops/nix/jetson-support.nix
|
||||||
"-DLLAMA_METAL=ON"
|
|
||||||
] else [
|
|
||||||
"-DLLAMA_BLAS=ON"
|
|
||||||
"-DLLAMA_BLAS_VENDOR=OpenBLAS"
|
|
||||||
]);
|
|
||||||
};
|
|
||||||
packages.opencl = pkgs.stdenv.mkDerivation {
|
|
||||||
inherit name src meta postPatch nativeBuildInputs postInstall;
|
|
||||||
buildInputs = with pkgs; buildInputs ++ [ clblast ];
|
|
||||||
cmakeFlags = cmakeFlags ++ [
|
|
||||||
"-DLLAMA_CLBLAST=ON"
|
|
||||||
];
|
];
|
||||||
};
|
|
||||||
packages.cuda = pkgs.stdenv.mkDerivation {
|
# An overlay can be used to have a more granular control over llama-cpp's
|
||||||
inherit name src meta postPatch nativeBuildInputs postInstall;
|
# dependencies and configuration, than that offered by the `.override`
|
||||||
buildInputs = with pkgs; buildInputs ++ [ cudatoolkit_joined ];
|
# mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays.
|
||||||
cmakeFlags = cmakeFlags ++ [
|
#
|
||||||
"-DLLAMA_CUBLAS=ON"
|
# E.g. in a flake:
|
||||||
];
|
# ```
|
||||||
};
|
# { nixpkgs, llama-cpp, ... }:
|
||||||
packages.rocm = pkgs.stdenv.mkDerivation {
|
# let pkgs = import nixpkgs {
|
||||||
inherit name src meta postPatch nativeBuildInputs postInstall;
|
# overlays = [ (llama-cpp.overlays.default) ];
|
||||||
buildInputs = with pkgs.rocmPackages; buildInputs ++ [ clr hipblas rocblas ];
|
# system = "aarch64-linux";
|
||||||
cmakeFlags = cmakeFlags ++ [
|
# config.allowUnfree = true;
|
||||||
"-DLLAMA_HIPBLAS=1"
|
# config.cudaSupport = true;
|
||||||
"-DCMAKE_C_COMPILER=hipcc"
|
# config.cudaCapabilities = [ "7.2" ];
|
||||||
"-DCMAKE_CXX_COMPILER=hipcc"
|
# config.cudaEnableForwardCompat = false;
|
||||||
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
|
# }; in {
|
||||||
# in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
|
# packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp;
|
||||||
# and select the line that matches the current nixpkgs version of rocBLAS.
|
# }
|
||||||
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
|
# ```
|
||||||
];
|
#
|
||||||
};
|
# Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format
|
||||||
apps.llama-server = {
|
flake.overlays.default =
|
||||||
type = "app";
|
(final: prev: {
|
||||||
program = "${self.packages.${system}.default}/bin/llama-server";
|
llamaPackages = final.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
||||||
};
|
inherit (final.llamaPackages) llama-cpp;
|
||||||
apps.llama-embedding = {
|
|
||||||
type = "app";
|
|
||||||
program = "${self.packages.${system}.default}/bin/embedding";
|
|
||||||
};
|
|
||||||
apps.llama = {
|
|
||||||
type = "app";
|
|
||||||
program = "${self.packages.${system}.default}/bin/llama";
|
|
||||||
};
|
|
||||||
apps.quantize = {
|
|
||||||
type = "app";
|
|
||||||
program = "${self.packages.${system}.default}/bin/quantize";
|
|
||||||
};
|
|
||||||
apps.train-text-from-scratch = {
|
|
||||||
type = "app";
|
|
||||||
program = "${self.packages.${system}.default}/bin/train-text-from-scratch";
|
|
||||||
};
|
|
||||||
apps.default = self.apps.${system}.llama;
|
|
||||||
devShells.default = pkgs.mkShell {
|
|
||||||
buildInputs = [ llama-python ];
|
|
||||||
packages = nativeBuildInputs ++ osSpecific;
|
|
||||||
};
|
|
||||||
devShells.extra = pkgs.mkShell {
|
|
||||||
buildInputs = [ llama-python-extra ];
|
|
||||||
packages = nativeBuildInputs ++ osSpecific;
|
|
||||||
};
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
systems = [
|
||||||
|
"aarch64-darwin"
|
||||||
|
"aarch64-linux"
|
||||||
|
"x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant)
|
||||||
|
"x86_64-linux"
|
||||||
|
];
|
||||||
|
|
||||||
|
perSystem =
|
||||||
|
{
|
||||||
|
config,
|
||||||
|
lib,
|
||||||
|
pkgs,
|
||||||
|
pkgsCuda,
|
||||||
|
pkgsRocm,
|
||||||
|
...
|
||||||
|
}:
|
||||||
|
{
|
||||||
|
# We don't use the overlay here so as to avoid making too many instances of nixpkgs,
|
||||||
|
# cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs
|
||||||
|
packages =
|
||||||
|
{
|
||||||
|
default = (pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
|
||||||
|
}
|
||||||
|
// lib.optionalAttrs pkgs.stdenv.isLinux {
|
||||||
|
opencl = config.packages.default.override { useOpenCL = true; };
|
||||||
|
cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
|
||||||
|
rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
|
||||||
|
|
||||||
|
mpi-cpu = config.packages.default.override { useMpi = true; };
|
||||||
|
mpi-cuda = config.packages.default.override { useMpi = true; };
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user