mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 10:24:35 +00:00
build(nix): Package gguf-py (#5664)
* style: format with nixfmt/rfc101-style * build(nix): Package gguf-py * build(nix): Refactor to new scope for gguf-py * build(nix): Exclude gguf-py from devShells * build(nix): Refactor gguf-py derivation to take in exact deps * build(nix): Enable pytestCheckHook and pythonImportsCheck for gguf-py * build(python): Package python scripts with pyproject.toml * chore: Cleanup * dev(nix): Break up python/C devShells * build(python): Relax pytorch version constraint Nix has an older version * chore: Move cmake to nativeBuildInputs for devShell * fmt: Reconcile formatting with rebase * style: nix fmt * cleanup: Remove unncessary __init__.py * chore: Suggestions from review - Filter out non-source files from llama-scripts flake derivation - Clean up unused closure - Remove scripts devShell * revert: Bad changes * dev: Simplify devShells, restore the -extra devShell * build(nix): Add pyyaml for gguf-py * chore: Remove some unused bindings * dev: Add tiktoken to -extra devShells
This commit is contained in:
parent
c6d4cb4655
commit
9c1ba55733
@ -1,13 +1,52 @@
|
|||||||
|
{ inputs, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
perSystem =
|
perSystem =
|
||||||
{ config, lib, ... }:
|
{
|
||||||
|
config,
|
||||||
|
lib,
|
||||||
|
system,
|
||||||
|
...
|
||||||
|
}:
|
||||||
{
|
{
|
||||||
devShells =
|
devShells =
|
||||||
lib.concatMapAttrs
|
let
|
||||||
(name: package: {
|
pkgs = import inputs.nixpkgs { inherit system; };
|
||||||
${name} = package.passthru.shell;
|
stdenv = pkgs.stdenv;
|
||||||
${name + "-extra"} = package.passthru.shell-extra;
|
scripts = config.packages.python-scripts;
|
||||||
})
|
in
|
||||||
config.packages;
|
lib.pipe (config.packages) [
|
||||||
|
(lib.concatMapAttrs (
|
||||||
|
name: package: {
|
||||||
|
${name} = pkgs.mkShell {
|
||||||
|
name = "${name}";
|
||||||
|
inputsFrom = [ package ];
|
||||||
|
shellHook = ''
|
||||||
|
echo "Entering ${name} devShell"
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
"${name}-extra" =
|
||||||
|
if (name == "python-scripts") then
|
||||||
|
null
|
||||||
|
else
|
||||||
|
pkgs.mkShell {
|
||||||
|
name = "${name}-extra";
|
||||||
|
inputsFrom = [
|
||||||
|
package
|
||||||
|
scripts
|
||||||
|
];
|
||||||
|
# Extra packages that *may* be used by some scripts
|
||||||
|
packages = [
|
||||||
|
pkgs.python3Packages.tiktoken
|
||||||
|
];
|
||||||
|
shellHook = ''
|
||||||
|
echo "Entering ${name} devShell"
|
||||||
|
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
}
|
||||||
|
))
|
||||||
|
(lib.filterAttrs (name: value: value != null))
|
||||||
|
];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -26,16 +26,14 @@
|
|||||||
config.cudaSupport = true;
|
config.cudaSupport = true;
|
||||||
config.allowUnfreePredicate =
|
config.allowUnfreePredicate =
|
||||||
p:
|
p:
|
||||||
builtins.all
|
builtins.all (
|
||||||
(
|
license:
|
||||||
license:
|
license.free
|
||||||
license.free
|
|| builtins.elem license.shortName [
|
||||||
|| builtins.elem license.shortName [
|
"CUDA EULA"
|
||||||
"CUDA EULA"
|
"cuDNN EULA"
|
||||||
"cuDNN EULA"
|
]
|
||||||
]
|
) (p.meta.licenses or [ p.meta.license ]);
|
||||||
)
|
|
||||||
(p.meta.licenses or [ p.meta.license ]);
|
|
||||||
};
|
};
|
||||||
# Ensure dependencies use ROCm consistently
|
# Ensure dependencies use ROCm consistently
|
||||||
pkgsRocm = import inputs.nixpkgs {
|
pkgsRocm = import inputs.nixpkgs {
|
||||||
|
36
.devops/nix/package-gguf-py.nix
Normal file
36
.devops/nix/package-gguf-py.nix
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
llamaVersion,
|
||||||
|
numpy,
|
||||||
|
tqdm,
|
||||||
|
sentencepiece,
|
||||||
|
pyyaml,
|
||||||
|
poetry-core,
|
||||||
|
buildPythonPackage,
|
||||||
|
pytestCheckHook,
|
||||||
|
}:
|
||||||
|
|
||||||
|
buildPythonPackage {
|
||||||
|
pname = "gguf";
|
||||||
|
version = llamaVersion;
|
||||||
|
pyproject = true;
|
||||||
|
nativeBuildInputs = [ poetry-core ];
|
||||||
|
propagatedBuildInputs = [
|
||||||
|
numpy
|
||||||
|
tqdm
|
||||||
|
sentencepiece
|
||||||
|
pyyaml
|
||||||
|
];
|
||||||
|
src = lib.cleanSource ../../gguf-py;
|
||||||
|
pythonImportsCheck = [
|
||||||
|
"numpy"
|
||||||
|
"gguf"
|
||||||
|
];
|
||||||
|
nativeCheckInputs = [ pytestCheckHook ];
|
||||||
|
doCheck = true;
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Python package for writing binary files in the GGUF format";
|
||||||
|
license = licenses.mit;
|
||||||
|
maintainers = [ maintainers.ditsuke ];
|
||||||
|
};
|
||||||
|
}
|
@ -3,13 +3,11 @@
|
|||||||
glibc,
|
glibc,
|
||||||
config,
|
config,
|
||||||
stdenv,
|
stdenv,
|
||||||
mkShell,
|
|
||||||
runCommand,
|
runCommand,
|
||||||
cmake,
|
cmake,
|
||||||
ninja,
|
ninja,
|
||||||
pkg-config,
|
pkg-config,
|
||||||
git,
|
git,
|
||||||
python3,
|
|
||||||
mpi,
|
mpi,
|
||||||
blas,
|
blas,
|
||||||
cudaPackages,
|
cudaPackages,
|
||||||
@ -20,15 +18,18 @@
|
|||||||
vulkan-loader,
|
vulkan-loader,
|
||||||
curl,
|
curl,
|
||||||
shaderc,
|
shaderc,
|
||||||
useBlas ? builtins.all (x: !x) [
|
useBlas ?
|
||||||
useCuda
|
builtins.all (x: !x) [
|
||||||
useMetalKit
|
useCuda
|
||||||
useRocm
|
useMetalKit
|
||||||
useVulkan
|
useRocm
|
||||||
] && blas.meta.available,
|
useVulkan
|
||||||
|
]
|
||||||
|
&& blas.meta.available,
|
||||||
useCuda ? config.cudaSupport,
|
useCuda ? config.cudaSupport,
|
||||||
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
||||||
useMpi ? false, # Increases the runtime closure size by ~700M
|
# Increases the runtime closure size by ~700M
|
||||||
|
useMpi ? false,
|
||||||
useRocm ? config.rocmSupport,
|
useRocm ? config.rocmSupport,
|
||||||
enableCurl ? true,
|
enableCurl ? true,
|
||||||
useVulkan ? false,
|
useVulkan ? false,
|
||||||
@ -38,8 +39,8 @@
|
|||||||
# otherwise we get libstdc++ errors downstream.
|
# otherwise we get libstdc++ errors downstream.
|
||||||
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
||||||
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
||||||
precompileMetalShaders ? false
|
precompileMetalShaders ? false,
|
||||||
}@inputs:
|
}:
|
||||||
|
|
||||||
let
|
let
|
||||||
inherit (lib)
|
inherit (lib)
|
||||||
@ -47,7 +48,6 @@ let
|
|||||||
cmakeFeature
|
cmakeFeature
|
||||||
optionals
|
optionals
|
||||||
strings
|
strings
|
||||||
versionOlder
|
|
||||||
;
|
;
|
||||||
|
|
||||||
stdenv = throw "Use effectiveStdenv instead";
|
stdenv = throw "Use effectiveStdenv instead";
|
||||||
@ -63,54 +63,11 @@ let
|
|||||||
pnameSuffix =
|
pnameSuffix =
|
||||||
strings.optionalString (suffices != [ ])
|
strings.optionalString (suffices != [ ])
|
||||||
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
||||||
descriptionSuffix =
|
descriptionSuffix = strings.optionalString (
|
||||||
strings.optionalString (suffices != [ ])
|
suffices != [ ]
|
||||||
", accelerated with ${strings.concatStringsSep ", " suffices}";
|
) ", accelerated with ${strings.concatStringsSep ", " suffices}";
|
||||||
|
|
||||||
executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
|
xcrunHost = runCommand "xcrunHost" { } ''
|
||||||
|
|
||||||
# TODO: package the Python in this repository in a Nix-like way.
|
|
||||||
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
|
|
||||||
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
|
|
||||||
# https://peps.python.org/pep-0517/
|
|
||||||
#
|
|
||||||
# TODO: Package up each Python script or service appropriately, by making
|
|
||||||
# them into "entrypoints"
|
|
||||||
llama-python = python3.withPackages (
|
|
||||||
ps: [
|
|
||||||
ps.numpy
|
|
||||||
ps.sentencepiece
|
|
||||||
]
|
|
||||||
);
|
|
||||||
|
|
||||||
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
|
|
||||||
llama-python-extra = python3.withPackages (
|
|
||||||
ps: [
|
|
||||||
ps.numpy
|
|
||||||
ps.sentencepiece
|
|
||||||
ps.tiktoken
|
|
||||||
ps.torchWithoutCuda
|
|
||||||
ps.transformers
|
|
||||||
|
|
||||||
# server bench
|
|
||||||
ps.matplotlib
|
|
||||||
|
|
||||||
# server tests
|
|
||||||
ps.openai
|
|
||||||
ps.behave
|
|
||||||
ps.prometheus-client
|
|
||||||
|
|
||||||
# for examples/pydantic-models-to-grammar-examples.py
|
|
||||||
ps.docstring-parser
|
|
||||||
ps.pydantic
|
|
||||||
|
|
||||||
# for scripts/compare-llama-bench.py
|
|
||||||
ps.gitpython
|
|
||||||
ps.tabulate
|
|
||||||
]
|
|
||||||
);
|
|
||||||
|
|
||||||
xcrunHost = runCommand "xcrunHost" {} ''
|
|
||||||
mkdir -p $out/bin
|
mkdir -p $out/bin
|
||||||
ln -s /usr/bin/xcrun $out/bin
|
ln -s /usr/bin/xcrun $out/bin
|
||||||
'';
|
'';
|
||||||
@ -145,178 +102,145 @@ let
|
|||||||
];
|
];
|
||||||
in
|
in
|
||||||
|
|
||||||
effectiveStdenv.mkDerivation (
|
effectiveStdenv.mkDerivation (finalAttrs: {
|
||||||
finalAttrs: {
|
pname = "llama-cpp${pnameSuffix}";
|
||||||
pname = "llama-cpp${pnameSuffix}";
|
version = llamaVersion;
|
||||||
version = llamaVersion;
|
|
||||||
|
|
||||||
# Note: none of the files discarded here are visible in the sandbox or
|
# Note: none of the files discarded here are visible in the sandbox or
|
||||||
# affect the output hash. This also means they can be modified without
|
# affect the output hash. This also means they can be modified without
|
||||||
# triggering a rebuild.
|
# triggering a rebuild.
|
||||||
src = lib.cleanSourceWith {
|
src = lib.cleanSourceWith {
|
||||||
filter =
|
filter =
|
||||||
name: type:
|
name: type:
|
||||||
let
|
let
|
||||||
noneOf = builtins.all (x: !x);
|
noneOf = builtins.all (x: !x);
|
||||||
baseName = baseNameOf name;
|
baseName = baseNameOf name;
|
||||||
in
|
in
|
||||||
noneOf [
|
noneOf [
|
||||||
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
||||||
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
||||||
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
||||||
(baseName == "flake.lock")
|
(baseName == "flake.lock")
|
||||||
];
|
|
||||||
src = lib.cleanSource ../../.;
|
|
||||||
};
|
|
||||||
|
|
||||||
postPatch = ''
|
|
||||||
substituteInPlace ./ggml/src/ggml-metal.m \
|
|
||||||
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
|
||||||
substituteInPlace ./ggml/src/ggml-metal.m \
|
|
||||||
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
|
||||||
'';
|
|
||||||
|
|
||||||
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
|
||||||
# `default.metallib` may be compiled with Metal compiler from XCode
|
|
||||||
# and we need to escape sandbox on MacOS to access Metal compiler.
|
|
||||||
# `xcrun` is used find the path of the Metal compiler, which is varible
|
|
||||||
# and not on $PATH
|
|
||||||
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
|
|
||||||
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
|
||||||
|
|
||||||
nativeBuildInputs =
|
|
||||||
[
|
|
||||||
cmake
|
|
||||||
ninja
|
|
||||||
pkg-config
|
|
||||||
git
|
|
||||||
]
|
|
||||||
++ optionals useCuda [
|
|
||||||
cudaPackages.cuda_nvcc
|
|
||||||
autoAddDriverRunpath
|
|
||||||
]
|
|
||||||
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
|
|
||||||
glibc.static
|
|
||||||
] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
|
|
||||||
xcrunHost
|
|
||||||
];
|
];
|
||||||
|
src = lib.cleanSource ../../.;
|
||||||
|
};
|
||||||
|
|
||||||
buildInputs =
|
postPatch = ''
|
||||||
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
substituteInPlace ./ggml/src/ggml-metal.m \
|
||||||
++ optionals useCuda cudaBuildInputs
|
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
||||||
++ optionals useMpi [ mpi ]
|
substituteInPlace ./ggml/src/ggml-metal.m \
|
||||||
++ optionals useRocm rocmBuildInputs
|
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
||||||
++ optionals useBlas [ blas ]
|
'';
|
||||||
++ optionals useVulkan vulkanBuildInputs
|
|
||||||
++ optionals enableCurl [ curl ];
|
|
||||||
|
|
||||||
cmakeFlags =
|
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
||||||
[
|
# `default.metallib` may be compiled with Metal compiler from XCode
|
||||||
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
# and we need to escape sandbox on MacOS to access Metal compiler.
|
||||||
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
# `xcrun` is used find the path of the Metal compiler, which is varible
|
||||||
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
# and not on $PATH
|
||||||
(cmakeBool "LLAMA_CURL" enableCurl)
|
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
|
||||||
(cmakeBool "GGML_NATIVE" false)
|
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
||||||
(cmakeBool "GGML_BLAS" useBlas)
|
|
||||||
(cmakeBool "GGML_CUDA" useCuda)
|
nativeBuildInputs =
|
||||||
(cmakeBool "GGML_HIPBLAS" useRocm)
|
[
|
||||||
(cmakeBool "GGML_METAL" useMetalKit)
|
cmake
|
||||||
(cmakeBool "GGML_VULKAN" useVulkan)
|
ninja
|
||||||
(cmakeBool "GGML_STATIC" enableStatic)
|
pkg-config
|
||||||
]
|
git
|
||||||
++ optionals useCuda [
|
]
|
||||||
(
|
++ optionals useCuda [
|
||||||
with cudaPackages.flags;
|
cudaPackages.cuda_nvcc
|
||||||
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
|
||||||
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
autoAddDriverRunpath
|
||||||
)
|
]
|
||||||
|
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
|
||||||
|
++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
||||||
|
++ optionals useCuda cudaBuildInputs
|
||||||
|
++ optionals useMpi [ mpi ]
|
||||||
|
++ optionals useRocm rocmBuildInputs
|
||||||
|
++ optionals useBlas [ blas ]
|
||||||
|
++ optionals useVulkan vulkanBuildInputs
|
||||||
|
++ optionals enableCurl [ curl ];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
||||||
|
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
||||||
|
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
||||||
|
(cmakeBool "LLAMA_CURL" enableCurl)
|
||||||
|
(cmakeBool "GGML_NATIVE" false)
|
||||||
|
(cmakeBool "GGML_BLAS" useBlas)
|
||||||
|
(cmakeBool "GGML_CUDA" useCuda)
|
||||||
|
(cmakeBool "GGML_HIPBLAS" useRocm)
|
||||||
|
(cmakeBool "GGML_METAL" useMetalKit)
|
||||||
|
(cmakeBool "GGML_VULKAN" useVulkan)
|
||||||
|
(cmakeBool "GGML_STATIC" enableStatic)
|
||||||
|
]
|
||||||
|
++ optionals useCuda [
|
||||||
|
(
|
||||||
|
with cudaPackages.flags;
|
||||||
|
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
||||||
|
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
||||||
)
|
)
|
||||||
]
|
)
|
||||||
++ optionals useRocm [
|
]
|
||||||
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
++ optionals useRocm [
|
||||||
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
|
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
||||||
]
|
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
|
||||||
++ optionals useMetalKit [
|
]
|
||||||
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
++ optionals useMetalKit [
|
||||||
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
||||||
];
|
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
||||||
|
];
|
||||||
|
|
||||||
# Environment variables needed for ROCm
|
# Environment variables needed for ROCm
|
||||||
env = optionals useRocm {
|
env = optionals useRocm {
|
||||||
ROCM_PATH = "${rocmPackages.clr}";
|
ROCM_PATH = "${rocmPackages.clr}";
|
||||||
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
||||||
};
|
};
|
||||||
|
|
||||||
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
||||||
# if they haven't been added yet.
|
# if they haven't been added yet.
|
||||||
postInstall = ''
|
postInstall = ''
|
||||||
mkdir -p $out/include
|
mkdir -p $out/include
|
||||||
cp $src/include/llama.h $out/include/
|
cp $src/include/llama.h $out/include/
|
||||||
'';
|
'';
|
||||||
|
|
||||||
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
|
meta = {
|
||||||
passthru = {
|
# Configurations we don't want even the CI to evaluate. Results in the
|
||||||
inherit
|
# "unsupported platform" messages. This is mostly a no-op, because
|
||||||
useBlas
|
# cudaPackages would've refused to evaluate anyway.
|
||||||
useCuda
|
badPlatforms = optionals useCuda lib.platforms.darwin;
|
||||||
useMetalKit
|
|
||||||
useMpi
|
|
||||||
useRocm
|
|
||||||
useVulkan
|
|
||||||
;
|
|
||||||
|
|
||||||
shell = mkShell {
|
# Configurations that are known to result in build failures. Can be
|
||||||
name = "shell-${finalAttrs.finalPackage.name}";
|
# overridden by importing Nixpkgs with `allowBroken = true`.
|
||||||
description = "contains numpy and sentencepiece";
|
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
||||||
buildInputs = [ llama-python ];
|
|
||||||
inputsFrom = [ finalAttrs.finalPackage ];
|
|
||||||
shellHook = ''
|
|
||||||
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
shell-extra = mkShell {
|
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
||||||
name = "shell-extra-${finalAttrs.finalPackage.name}";
|
homepage = "https://github.com/ggerganov/llama.cpp/";
|
||||||
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
|
license = lib.licenses.mit;
|
||||||
buildInputs = [ llama-python-extra ];
|
|
||||||
inputsFrom = [ finalAttrs.finalPackage ];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
meta = {
|
# Accommodates `nix run` and `lib.getExe`
|
||||||
# Configurations we don't want even the CI to evaluate. Results in the
|
mainProgram = "llama-cli";
|
||||||
# "unsupported platform" messages. This is mostly a no-op, because
|
|
||||||
# cudaPackages would've refused to evaluate anyway.
|
|
||||||
badPlatforms = optionals useCuda lib.platforms.darwin;
|
|
||||||
|
|
||||||
# Configurations that are known to result in build failures. Can be
|
# These people might respond, on the best effort basis, if you ping them
|
||||||
# overridden by importing Nixpkgs with `allowBroken = true`.
|
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
||||||
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
# Consider adding yourself to this list if you want to ensure this flake
|
||||||
|
# stays maintained and you're willing to invest your time. Do not add
|
||||||
|
# other people without their consent. Consider removing people after
|
||||||
|
# they've been unreachable for long periods of time.
|
||||||
|
|
||||||
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
||||||
homepage = "https://github.com/ggerganov/llama.cpp/";
|
# an attrset following the same format as in
|
||||||
license = lib.licenses.mit;
|
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
||||||
|
maintainers = with lib.maintainers; [
|
||||||
|
philiptaron
|
||||||
|
SomeoneSerge
|
||||||
|
];
|
||||||
|
|
||||||
# Accommodates `nix run` and `lib.getExe`
|
# Extend `badPlatforms` instead
|
||||||
mainProgram = "llama-cli";
|
platforms = lib.platforms.all;
|
||||||
|
};
|
||||||
# These people might respond, on the best effort basis, if you ping them
|
})
|
||||||
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
|
||||||
# Consider adding yourself to this list if you want to ensure this flake
|
|
||||||
# stays maintained and you're willing to invest your time. Do not add
|
|
||||||
# other people without their consent. Consider removing people after
|
|
||||||
# they've been unreachable for long periods of time.
|
|
||||||
|
|
||||||
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
|
||||||
# an attrset following the same format as in
|
|
||||||
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
|
||||||
maintainers = with lib.maintainers; [
|
|
||||||
philiptaron
|
|
||||||
SomeoneSerge
|
|
||||||
];
|
|
||||||
|
|
||||||
# Extend `badPlatforms` instead
|
|
||||||
platforms = lib.platforms.all;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
66
.devops/nix/python-scripts.nix
Normal file
66
.devops/nix/python-scripts.nix
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
buildPythonPackage,
|
||||||
|
poetry-core,
|
||||||
|
mkShell,
|
||||||
|
python3Packages,
|
||||||
|
gguf-py,
|
||||||
|
}@inputs:
|
||||||
|
|
||||||
|
let
|
||||||
|
llama-python-deps = with python3Packages; [
|
||||||
|
numpy
|
||||||
|
sentencepiece
|
||||||
|
transformers
|
||||||
|
protobuf
|
||||||
|
torchWithoutCuda
|
||||||
|
gguf-py
|
||||||
|
tqdm
|
||||||
|
|
||||||
|
# for scripts/compare-llama-bench.py
|
||||||
|
gitpython
|
||||||
|
tabulate
|
||||||
|
|
||||||
|
# for examples/pydantic-models-to-grammar-examples.py
|
||||||
|
docstring-parser
|
||||||
|
pydantic
|
||||||
|
|
||||||
|
];
|
||||||
|
|
||||||
|
llama-python-test-deps = with python3Packages; [
|
||||||
|
# Server bench
|
||||||
|
matplotlib
|
||||||
|
|
||||||
|
# server tests
|
||||||
|
openai
|
||||||
|
behave
|
||||||
|
prometheus-client
|
||||||
|
];
|
||||||
|
in
|
||||||
|
|
||||||
|
buildPythonPackage ({
|
||||||
|
pname = "llama-scripts";
|
||||||
|
version = "0.0.0";
|
||||||
|
pyproject = true;
|
||||||
|
|
||||||
|
# NOTE: The files filtered out here are not visible in the build sandbox, neither
|
||||||
|
# do they affect the output hash. They can be modified without triggering a rebuild.
|
||||||
|
src = lib.cleanSourceWith {
|
||||||
|
filter =
|
||||||
|
name: type:
|
||||||
|
let
|
||||||
|
any = builtins.any (x: x);
|
||||||
|
baseName = builtins.baseNameOf name;
|
||||||
|
in
|
||||||
|
any [
|
||||||
|
(lib.hasSuffix ".py" name)
|
||||||
|
(baseName == "README.md")
|
||||||
|
(baseName == "pyproject.toml")
|
||||||
|
];
|
||||||
|
src = lib.cleanSource ../../.;
|
||||||
|
};
|
||||||
|
nativeBuildInputs = [ poetry-core ];
|
||||||
|
nativeCheckInputs = llama-python-test-deps;
|
||||||
|
dependencies = llama-python-deps;
|
||||||
|
})
|
@ -1,19 +1,41 @@
|
|||||||
{
|
{
|
||||||
lib,
|
lib,
|
||||||
newScope,
|
newScope,
|
||||||
|
python3,
|
||||||
llamaVersion ? "0.0.0",
|
llamaVersion ? "0.0.0",
|
||||||
}:
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
pythonPackages = python3.pkgs;
|
||||||
|
buildPythonPackage = pythonPackages.buildPythonPackage;
|
||||||
|
numpy = pythonPackages.numpy;
|
||||||
|
tqdm = pythonPackages.tqdm;
|
||||||
|
sentencepiece = pythonPackages.sentencepiece;
|
||||||
|
pyyaml = pythonPackages.pyyaml;
|
||||||
|
poetry-core = pythonPackages.poetry-core;
|
||||||
|
pytestCheckHook = pythonPackages.pytestCheckHook;
|
||||||
|
in
|
||||||
|
|
||||||
# We're using `makeScope` instead of just writing out an attrset
|
# We're using `makeScope` instead of just writing out an attrset
|
||||||
# because it allows users to apply overlays later using `overrideScope'`.
|
# because it allows users to apply overlays later using `overrideScope'`.
|
||||||
# Cf. https://noogle.dev/f/lib/makeScope
|
# Cf. https://noogle.dev/f/lib/makeScope
|
||||||
|
|
||||||
lib.makeScope newScope (
|
lib.makeScope newScope (self: {
|
||||||
self: {
|
inherit llamaVersion;
|
||||||
inherit llamaVersion;
|
gguf-py = self.callPackage ./package-gguf-py.nix {
|
||||||
llama-cpp = self.callPackage ./package.nix { };
|
inherit
|
||||||
docker = self.callPackage ./docker.nix { };
|
buildPythonPackage
|
||||||
docker-min = self.callPackage ./docker.nix { interactive = false; };
|
numpy
|
||||||
sif = self.callPackage ./sif.nix { };
|
tqdm
|
||||||
}
|
sentencepiece
|
||||||
)
|
poetry-core
|
||||||
|
pyyaml
|
||||||
|
pytestCheckHook
|
||||||
|
;
|
||||||
|
};
|
||||||
|
python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
|
||||||
|
llama-cpp = self.callPackage ./package.nix { };
|
||||||
|
docker = self.callPackage ./docker.nix { };
|
||||||
|
docker-min = self.callPackage ./docker.nix { interactive = false; };
|
||||||
|
sif = self.callPackage ./sif.nix { };
|
||||||
|
})
|
||||||
|
@ -145,7 +145,9 @@
|
|||||||
# the same path you would with an overlay.
|
# the same path you would with an overlay.
|
||||||
legacyPackages = {
|
legacyPackages = {
|
||||||
llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
||||||
llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix {
|
||||||
|
inherit llamaVersion;
|
||||||
|
};
|
||||||
llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
||||||
llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
||||||
};
|
};
|
||||||
@ -157,6 +159,7 @@
|
|||||||
default = config.legacyPackages.llamaPackages.llama-cpp;
|
default = config.legacyPackages.llamaPackages.llama-cpp;
|
||||||
vulkan = config.packages.default.override { useVulkan = true; };
|
vulkan = config.packages.default.override { useVulkan = true; };
|
||||||
windows = config.legacyPackages.llamaPackagesWindows.llama-cpp;
|
windows = config.legacyPackages.llamaPackagesWindows.llama-cpp;
|
||||||
|
python-scripts = config.legacyPackages.llamaPackages.python-scripts;
|
||||||
}
|
}
|
||||||
// lib.optionalAttrs pkgs.stdenv.isLinux {
|
// lib.optionalAttrs pkgs.stdenv.isLinux {
|
||||||
cuda = config.legacyPackages.llamaPackagesCuda.llama-cpp;
|
cuda = config.legacyPackages.llamaPackagesCuda.llama-cpp;
|
||||||
|
@ -23,6 +23,7 @@ python = ">=3.8"
|
|||||||
numpy = ">=1.17"
|
numpy = ">=1.17"
|
||||||
tqdm = ">=4.27"
|
tqdm = ">=4.27"
|
||||||
pyyaml = ">=5.1"
|
pyyaml = ">=5.1"
|
||||||
|
sentencepiece = ">=0.1.98,<=0.2.0"
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.dev-dependencies]
|
||||||
pytest = "^5.2"
|
pytest = "^5.2"
|
||||||
|
@ -17,7 +17,7 @@ classifiers = [
|
|||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = ">=3.9"
|
python = ">=3.9"
|
||||||
numpy = "^1.25.0"
|
numpy = "^1.25.0"
|
||||||
sentencepiece = ">=0.1.98,<0.2.0"
|
sentencepiece = ">=0.1.98,<=0.2.0"
|
||||||
transformers = ">=4.35.2,<5.0.0"
|
transformers = ">=4.35.2,<5.0.0"
|
||||||
protobuf = ">=4.21.0,<5.0.0"
|
protobuf = ">=4.21.0,<5.0.0"
|
||||||
gguf = { path = "./gguf-py" }
|
gguf = { path = "./gguf-py" }
|
||||||
|
Loading…
Reference in New Issue
Block a user