build(nix): Package gguf-py (#5664)

* style: format with nixfmt/rfc101-style * build(nix): Package gguf-py * build(nix): Refactor to new scope for gguf-py * build(nix): Exclude gguf-py from devShells * build(nix): Refactor gguf-py derivation to take in exact deps * build(nix): Enable pytestCheckHook and pythonImportsCheck for gguf-py * build(python): Package python scripts with pyproject.toml * chore: Cleanup * dev(nix): Break up python/C devShells * build(python): Relax pytorch version constraint Nix has an older version * chore: Move cmake to nativeBuildInputs for devShell * fmt: Reconcile formatting with rebase * style: nix fmt * cleanup: Remove unncessary __init__.py * chore: Suggestions from review - Filter out non-source files from llama-scripts flake derivation - Clean up unused closure - Remove scripts devShell * revert: Bad changes * dev: Simplify devShells, restore the -extra devShell * build(nix): Add pyyaml for gguf-py * chore: Remove some unused bindings * dev: Add tiktoken to -extra devShells
2024-12-25 02:44:36 +00:00 · 2024-09-02 16:51:01 +05:30 · 2024-09-02 16:51:01 +05:30 · 9c1ba55733
commit 9c1ba55733
parent c6d4cb4655
9 changed files with 337 additions and 248 deletions
--- a/.devops/nix/devshells.nix
+++ b/.devops/nix/devshells.nix
@ -1,13 +1,52 @@
 { inputs, ... }:
 {
  perSystem =
-    { config, lib, ... }:
+    {
      config,
      lib,
      system,
      ...
    }:
    {
      devShells =
-        lib.concatMapAttrs
+        let
-          (name: package: {
+          pkgs = import inputs.nixpkgs { inherit system; };
-            ${name} = package.passthru.shell;
+          stdenv = pkgs.stdenv;
-            ${name + "-extra"} = package.passthru.shell-extra;
+          scripts = config.packages.python-scripts;
-          })
+        in
-          config.packages;
+        lib.pipe (config.packages) [
          (lib.concatMapAttrs (
            name: package: {
              ${name} = pkgs.mkShell {
                name = "${name}";
                inputsFrom = [ package ];
                shellHook = ''
                  echo "Entering ${name} devShell"
                '';
              };
              "${name}-extra" =
                if (name == "python-scripts") then
                  null
                else
                  pkgs.mkShell {
                    name = "${name}-extra";
                    inputsFrom = [
                      package
                      scripts
                    ];
                    # Extra packages that *may* be used by some scripts
                    packages = [
                        pkgs.python3Packages.tiktoken
                    ];
                    shellHook = ''
                      echo "Entering ${name} devShell"
                      addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
                    '';
                  };
            }
          ))
          (lib.filterAttrs (name: value: value != null))
        ];
    };
 }
--- a/.devops/nix/nixpkgs-instances.nix
+++ b/.devops/nix/nixpkgs-instances.nix
@ -26,16 +26,14 @@
          config.cudaSupport = true;
          config.allowUnfreePredicate =
            p:
-            builtins.all
+            builtins.all (
-              (
+              license:
-                license:
+              license.free
-                license.free
+              || builtins.elem license.shortName [
-                || builtins.elem license.shortName [
+                "CUDA EULA"
-                  "CUDA EULA"
+                "cuDNN EULA"
-                  "cuDNN EULA"
+              ]
-                ]
+            ) (p.meta.licenses or [ p.meta.license ]);
              )
              (p.meta.licenses or [ p.meta.license ]);
        };
        # Ensure dependencies use ROCm consistently
        pkgsRocm = import inputs.nixpkgs {
--- a/.devops/nix/package-gguf-py.nix
+++ b/.devops/nix/package-gguf-py.nix
@ -0,0 +1,36 @@
 {
  lib,
  llamaVersion,
  numpy,
  tqdm,
  sentencepiece,
  pyyaml,
  poetry-core,
  buildPythonPackage,
  pytestCheckHook,
 }:
 buildPythonPackage {
  pname = "gguf";
  version = llamaVersion;
  pyproject = true;
  nativeBuildInputs = [ poetry-core ];
  propagatedBuildInputs = [
    numpy
    tqdm
    sentencepiece
    pyyaml
  ];
  src = lib.cleanSource ../../gguf-py;
  pythonImportsCheck = [
    "numpy"
    "gguf"
  ];
  nativeCheckInputs = [ pytestCheckHook ];
  doCheck = true;
  meta = with lib; {
    description = "Python package for writing binary files in the GGUF format";
    license = licenses.mit;
    maintainers = [ maintainers.ditsuke ];
  };
 }
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@ -3,13 +3,11 @@
  glibc,
  config,
  stdenv,
  mkShell,
  runCommand,
  cmake,
  ninja,
  pkg-config,
  git,
  python3,
  mpi,
  blas,
  cudaPackages,
@ -20,15 +18,18 @@
  vulkan-loader,
  curl,
  shaderc,
-  useBlas ? builtins.all (x: !x) [
+  useBlas ?
-    useCuda
+    builtins.all (x: !x) [
-    useMetalKit
+      useCuda
-    useRocm
+      useMetalKit
-    useVulkan
+      useRocm
-  ] && blas.meta.available,
+      useVulkan
    ]
    && blas.meta.available,
  useCuda ? config.cudaSupport,
  useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
-  useMpi ? false, # Increases the runtime closure size by ~700M
+  # Increases the runtime closure size by ~700M
  useMpi ? false,
  useRocm ? config.rocmSupport,
  enableCurl ? true,
  useVulkan ? false,
@ -38,8 +39,8 @@
  # otherwise we get libstdc++ errors downstream.
  effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
  enableStatic ? effectiveStdenv.hostPlatform.isStatic,
-  precompileMetalShaders ? false
+  precompileMetalShaders ? false,
-}@inputs:
+}:
 let
  inherit (lib)
@ -47,7 +48,6 @@ let
    cmakeFeature
    optionals
    strings
    versionOlder
    ;
  stdenv = throw "Use effectiveStdenv instead";
@ -63,54 +63,11 @@ let
  pnameSuffix =
    strings.optionalString (suffices != [ ])
      "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
-  descriptionSuffix =
+  descriptionSuffix = strings.optionalString (
-    strings.optionalString (suffices != [ ])
+    suffices != [ ]
-      ", accelerated with ${strings.concatStringsSep ", " suffices}";
+  ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
-  executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
+  xcrunHost = runCommand "xcrunHost" { } ''
  # TODO: package the Python in this repository in a Nix-like way.
  # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
  # is PEP 517-compatible, and ensure the correct .dist-info is generated.
  # https://peps.python.org/pep-0517/
  #
  # TODO: Package up each Python script or service appropriately, by making
  # them into "entrypoints"
  llama-python = python3.withPackages (
    ps: [
      ps.numpy
      ps.sentencepiece
    ]
  );
  # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
  llama-python-extra = python3.withPackages (
    ps: [
      ps.numpy
      ps.sentencepiece
      ps.tiktoken
      ps.torchWithoutCuda
      ps.transformers
      # server bench
      ps.matplotlib
      # server tests
      ps.openai
      ps.behave
      ps.prometheus-client
      # for examples/pydantic-models-to-grammar-examples.py
      ps.docstring-parser
      ps.pydantic
      # for scripts/compare-llama-bench.py
      ps.gitpython
      ps.tabulate
    ]
  );
  xcrunHost = runCommand "xcrunHost" {} ''
    mkdir -p $out/bin
    ln -s /usr/bin/xcrun $out/bin
  '';
@ -145,178 +102,145 @@ let
  ];
 in
-effectiveStdenv.mkDerivation (
+effectiveStdenv.mkDerivation (finalAttrs: {
-  finalAttrs: {
+  pname = "llama-cpp${pnameSuffix}";
-    pname = "llama-cpp${pnameSuffix}";
+  version = llamaVersion;
    version = llamaVersion;
-    # Note: none of the files discarded here are visible in the sandbox or
+  # Note: none of the files discarded here are visible in the sandbox or
-    # affect the output hash. This also means they can be modified without
+  # affect the output hash. This also means they can be modified without
-    # triggering a rebuild.
+  # triggering a rebuild.
-    src = lib.cleanSourceWith {
+  src = lib.cleanSourceWith {
-      filter =
+    filter =
-        name: type:
+      name: type:
-        let
+      let
-          noneOf = builtins.all (x: !x);
+        noneOf = builtins.all (x: !x);
-          baseName = baseNameOf name;
+        baseName = baseNameOf name;
-        in
+      in
-        noneOf [
+      noneOf [
-          (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
+        (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
-          (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
+        (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
-          (lib.hasPrefix "." baseName) # Skip hidden files and directories
+        (lib.hasPrefix "." baseName) # Skip hidden files and directories
-          (baseName == "flake.lock")
+        (baseName == "flake.lock")
        ];
      src = lib.cleanSource ../../.;
    };
    postPatch = ''
      substituteInPlace ./ggml/src/ggml-metal.m \
        --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
      substituteInPlace ./ggml/src/ggml-metal.m \
        --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
    '';
    # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
    # `default.metallib` may be compiled with Metal compiler from XCode
    # and we need to escape sandbox on MacOS to access Metal compiler.
    # `xcrun` is used find the path of the Metal compiler, which is varible
    # and not on $PATH
    # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
    __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
    nativeBuildInputs =
      [
        cmake
        ninja
        pkg-config
        git
      ]
      ++ optionals useCuda [
        cudaPackages.cuda_nvcc
        autoAddDriverRunpath
      ]
      ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
        glibc.static
      ] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
        xcrunHost
      ];
    src = lib.cleanSource ../../.;
  };
-    buildInputs =
+  postPatch = ''
-      optionals effectiveStdenv.isDarwin darwinBuildInputs
+    substituteInPlace ./ggml/src/ggml-metal.m \
-      ++ optionals useCuda cudaBuildInputs
+      --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
-      ++ optionals useMpi [ mpi ]
+    substituteInPlace ./ggml/src/ggml-metal.m \
-      ++ optionals useRocm rocmBuildInputs
+      --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
-      ++ optionals useBlas [ blas ]
+  '';
      ++ optionals useVulkan vulkanBuildInputs
      ++ optionals enableCurl [ curl ];
-    cmakeFlags =
+  # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
-      [
+  # `default.metallib` may be compiled with Metal compiler from XCode
-        (cmakeBool "LLAMA_BUILD_SERVER" true)
+  # and we need to escape sandbox on MacOS to access Metal compiler.
-        (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
+  # `xcrun` is used find the path of the Metal compiler, which is varible
-        (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
+  # and not on $PATH
-        (cmakeBool "LLAMA_CURL" enableCurl)
+  # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
-        (cmakeBool "GGML_NATIVE" false)
+  __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
-        (cmakeBool "GGML_BLAS" useBlas)
+
-        (cmakeBool "GGML_CUDA" useCuda)
+  nativeBuildInputs =
-        (cmakeBool "GGML_HIPBLAS" useRocm)
+    [
-        (cmakeBool "GGML_METAL" useMetalKit)
+      cmake
-        (cmakeBool "GGML_VULKAN" useVulkan)
+      ninja
-        (cmakeBool "GGML_STATIC" enableStatic)
+      pkg-config
-      ]
+      git
-      ++ optionals useCuda [
+    ]
-        (
+    ++ optionals useCuda [
-          with cudaPackages.flags;
+      cudaPackages.cuda_nvcc
-          cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
+
-            builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
+      autoAddDriverRunpath
-          )
+    ]
    ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
    ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
  buildInputs =
    optionals effectiveStdenv.isDarwin darwinBuildInputs
    ++ optionals useCuda cudaBuildInputs
    ++ optionals useMpi [ mpi ]
    ++ optionals useRocm rocmBuildInputs
    ++ optionals useBlas [ blas ]
    ++ optionals useVulkan vulkanBuildInputs
    ++ optionals enableCurl [ curl ];
  cmakeFlags =
    [
      (cmakeBool "LLAMA_BUILD_SERVER" true)
      (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
      (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
      (cmakeBool "LLAMA_CURL" enableCurl)
      (cmakeBool "GGML_NATIVE" false)
      (cmakeBool "GGML_BLAS" useBlas)
      (cmakeBool "GGML_CUDA" useCuda)
      (cmakeBool "GGML_HIPBLAS" useRocm)
      (cmakeBool "GGML_METAL" useMetalKit)
      (cmakeBool "GGML_VULKAN" useVulkan)
      (cmakeBool "GGML_STATIC" enableStatic)
    ]
    ++ optionals useCuda [
      (
        with cudaPackages.flags;
        cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
          builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
        )
-      ]
+      )
-      ++ optionals useRocm [
+    ]
-        (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
+    ++ optionals useRocm [
-        (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
+      (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
-      ]
+      (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
-      ++ optionals useMetalKit [
+    ]
-        (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
+    ++ optionals useMetalKit [
-        (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
+      (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
-      ];
+      (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
    ];
-    # Environment variables needed for ROCm
+  # Environment variables needed for ROCm
-    env = optionals useRocm {
+  env = optionals useRocm {
-      ROCM_PATH = "${rocmPackages.clr}";
+    ROCM_PATH = "${rocmPackages.clr}";
-      HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
+    HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
-    };
+  };
-    # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
+  # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
-    # if they haven't been added yet.
+  # if they haven't been added yet.
-    postInstall = ''
+  postInstall = ''
-      mkdir -p $out/include
+    mkdir -p $out/include
-      cp $src/include/llama.h $out/include/
+    cp $src/include/llama.h $out/include/
-    '';
+  '';
-    # Define the shells here, but don't add in the inputsFrom to avoid recursion.
+  meta = {
-    passthru = {
+    # Configurations we don't want even the CI to evaluate. Results in the
-      inherit
+    # "unsupported platform" messages. This is mostly a no-op, because
-        useBlas
+    # cudaPackages would've refused to evaluate anyway.
-        useCuda
+    badPlatforms = optionals useCuda lib.platforms.darwin;
        useMetalKit
        useMpi
        useRocm
        useVulkan
        ;
-      shell = mkShell {
+    # Configurations that are known to result in build failures. Can be
-        name = "shell-${finalAttrs.finalPackage.name}";
+    # overridden by importing Nixpkgs with `allowBroken = true`.
-        description = "contains numpy and sentencepiece";
+    broken = (useMetalKit && !effectiveStdenv.isDarwin);
        buildInputs = [ llama-python ];
        inputsFrom = [ finalAttrs.finalPackage ];
        shellHook = ''
          addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
        '';
      };
-      shell-extra = mkShell {
+    description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
-        name = "shell-extra-${finalAttrs.finalPackage.name}";
+    homepage = "https://github.com/ggerganov/llama.cpp/";
-        description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
+    license = lib.licenses.mit;
        buildInputs = [ llama-python-extra ];
        inputsFrom = [ finalAttrs.finalPackage ];
      };
    };
-    meta = {
+    # Accommodates `nix run` and `lib.getExe`
-      # Configurations we don't want even the CI to evaluate. Results in the
+    mainProgram = "llama-cli";
      # "unsupported platform" messages. This is mostly a no-op, because
      # cudaPackages would've refused to evaluate anyway.
      badPlatforms = optionals useCuda lib.platforms.darwin;
-      # Configurations that are known to result in build failures. Can be
+    # These people might respond, on the best effort basis, if you ping them
-      # overridden by importing Nixpkgs with `allowBroken = true`.
+    # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
-      broken = (useMetalKit && !effectiveStdenv.isDarwin);
+    # Consider adding yourself to this list if you want to ensure this flake
    # stays maintained and you're willing to invest your time. Do not add
    # other people without their consent. Consider removing people after
    # they've been unreachable for long periods of time.
-      description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
+    # Note that lib.maintainers is defined in Nixpkgs, but you may just add
-      homepage = "https://github.com/ggerganov/llama.cpp/";
+    # an attrset following the same format as in
-      license = lib.licenses.mit;
+    # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
    maintainers = with lib.maintainers; [
      philiptaron
      SomeoneSerge
    ];
-      # Accommodates `nix run` and `lib.getExe`
+    # Extend `badPlatforms` instead
-      mainProgram = "llama-cli";
+    platforms = lib.platforms.all;
-
+  };
-      # These people might respond, on the best effort basis, if you ping them
+})
      # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
      # Consider adding yourself to this list if you want to ensure this flake
      # stays maintained and you're willing to invest your time. Do not add
      # other people without their consent. Consider removing people after
      # they've been unreachable for long periods of time.
      # Note that lib.maintainers is defined in Nixpkgs, but you may just add
      # an attrset following the same format as in
      # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
      maintainers = with lib.maintainers; [
        philiptaron
        SomeoneSerge
      ];
      # Extend `badPlatforms` instead
      platforms = lib.platforms.all;
    };
  }
 )
--- a/.devops/nix/python-scripts.nix
+++ b/.devops/nix/python-scripts.nix
@ -0,0 +1,66 @@
 {
  lib,
  stdenv,
  buildPythonPackage,
  poetry-core,
  mkShell,
  python3Packages,
  gguf-py,
 }@inputs:
 let
  llama-python-deps = with python3Packages; [
    numpy
    sentencepiece
    transformers
    protobuf
    torchWithoutCuda
    gguf-py
    tqdm
    # for scripts/compare-llama-bench.py
    gitpython
    tabulate
    # for examples/pydantic-models-to-grammar-examples.py
    docstring-parser
    pydantic
  ];
  llama-python-test-deps = with python3Packages; [
    # Server bench
    matplotlib
    # server tests
    openai
    behave
    prometheus-client
  ];
 in
 buildPythonPackage ({
  pname = "llama-scripts";
  version = "0.0.0";
  pyproject = true;
  # NOTE: The files filtered out here are not visible in the build sandbox, neither
  # do they affect the output hash. They can be modified without triggering a rebuild.
  src = lib.cleanSourceWith {
    filter =
      name: type:
      let
        any = builtins.any (x: x);
        baseName = builtins.baseNameOf name;
      in
      any [
        (lib.hasSuffix ".py" name)
        (baseName == "README.md")
        (baseName == "pyproject.toml")
      ];
    src = lib.cleanSource ../../.;
  };
  nativeBuildInputs = [ poetry-core ];
  nativeCheckInputs = llama-python-test-deps;
  dependencies = llama-python-deps;
 })
--- a/.devops/nix/scope.nix
+++ b/.devops/nix/scope.nix
@ -1,19 +1,41 @@
 {
  lib,
  newScope,
  python3,
  llamaVersion ? "0.0.0",
 }:
 let
  pythonPackages = python3.pkgs;
  buildPythonPackage = pythonPackages.buildPythonPackage;
  numpy = pythonPackages.numpy;
  tqdm = pythonPackages.tqdm;
  sentencepiece = pythonPackages.sentencepiece;
  pyyaml = pythonPackages.pyyaml;
  poetry-core = pythonPackages.poetry-core;
  pytestCheckHook = pythonPackages.pytestCheckHook;
 in
 # We're using `makeScope` instead of just writing out an attrset
 # because it allows users to apply overlays later using `overrideScope'`.
 # Cf. https://noogle.dev/f/lib/makeScope
-lib.makeScope newScope (
+lib.makeScope newScope (self: {
-  self: {
+  inherit llamaVersion;
-    inherit llamaVersion;
+  gguf-py = self.callPackage ./package-gguf-py.nix {
-    llama-cpp = self.callPackage ./package.nix { };
+    inherit
-    docker = self.callPackage ./docker.nix { };
+      buildPythonPackage
-    docker-min = self.callPackage ./docker.nix { interactive = false; };
+      numpy
-    sif = self.callPackage ./sif.nix { };
+      tqdm
-  }
+      sentencepiece
-)
+      poetry-core
      pyyaml
      pytestCheckHook
      ;
  };
  python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
  llama-cpp = self.callPackage ./package.nix { };
  docker = self.callPackage ./docker.nix { };
  docker-min = self.callPackage ./docker.nix { interactive = false; };
  sif = self.callPackage ./sif.nix { };
 })
--- a/flake.nix
+++ b/flake.nix
@ -145,7 +145,9 @@
            # the same path you would with an overlay.
            legacyPackages = {
              llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
-              llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
+              llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix {
                inherit llamaVersion;
              };
              llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
              llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
            };
@ -157,6 +159,7 @@
                default = config.legacyPackages.llamaPackages.llama-cpp;
                vulkan = config.packages.default.override { useVulkan = true; };
                windows = config.legacyPackages.llamaPackagesWindows.llama-cpp;
                python-scripts = config.legacyPackages.llamaPackages.python-scripts;
              }
              // lib.optionalAttrs pkgs.stdenv.isLinux {
                cuda = config.legacyPackages.llamaPackagesCuda.llama-cpp;
--- a/gguf-py/pyproject.toml
+++ b/gguf-py/pyproject.toml
@ -23,6 +23,7 @@ python = ">=3.8"
 numpy = ">=1.17"
 tqdm = ">=4.27"
 pyyaml = ">=5.1"
 sentencepiece = ">=0.1.98,<=0.2.0"
 [tool.poetry.dev-dependencies]
 pytest = "^5.2"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -17,7 +17,7 @@ classifiers = [
 [tool.poetry.dependencies]
 python = ">=3.9"
 numpy = "^1.25.0"
-sentencepiece = ">=0.1.98,<0.2.0"
+sentencepiece = ">=0.1.98,<=0.2.0"
 transformers = ">=4.35.2,<5.0.0"
 protobuf = ">=4.21.0,<5.0.0"
 gguf = { path = "./gguf-py" }