diff --git a/.devops/full-musa.Dockerfile b/.devops/full-musa.Dockerfile index 575e81b48..3193fea1e 100644 --- a/.devops/full-musa.Dockerfile +++ b/.devops/full-musa.Dockerfile @@ -6,6 +6,9 @@ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_V FROM ${BASE_MUSA_DEV_CONTAINER} AS build +# MUSA architecture to build for (defaults to all supported archs) +ARG MUSA_DOCKER_ARCH=default + RUN apt-get update && \ apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 @@ -19,7 +22,11 @@ WORKDIR /app COPY . . -RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ +# Use the default MUSA archs if not specified +RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \ + export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \ + fi && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ cmake --build build --config Release -j$(nproc) && \ cp build/bin/* . diff --git a/.devops/llama-cli-musa.Dockerfile b/.devops/llama-cli-musa.Dockerfile index 3372749be..e7c75af20 100644 --- a/.devops/llama-cli-musa.Dockerfile +++ b/.devops/llama-cli-musa.Dockerfile @@ -8,6 +8,9 @@ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU FROM ${BASE_MUSA_DEV_CONTAINER} AS build +# MUSA architecture to build for (defaults to all supported archs) +ARG MUSA_DOCKER_ARCH=default + RUN apt-get update && \ apt-get install -y build-essential git cmake @@ -15,7 +18,11 @@ WORKDIR /app COPY . . -RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ +# Use the default MUSA archs if not specified +RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \ + export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \ + fi && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ cmake --build build --config Release --target llama-cli -j$(nproc) && \ mkdir -p /app/lib && \ find build -name "*.so" -exec cp {} /app/lib \; diff --git a/.devops/llama-server-musa.Dockerfile b/.devops/llama-server-musa.Dockerfile index eb67201c1..cebe51d42 100644 --- a/.devops/llama-server-musa.Dockerfile +++ b/.devops/llama-server-musa.Dockerfile @@ -8,6 +8,9 @@ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU FROM ${BASE_MUSA_DEV_CONTAINER} AS build +# MUSA architecture to build for (defaults to all supported archs) +ARG MUSA_DOCKER_ARCH=default + RUN apt-get update && \ apt-get install -y build-essential git cmake libcurl4-openssl-dev @@ -15,7 +18,11 @@ WORKDIR /app COPY . . -RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ +# Use the default MUSA archs if not specified +RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \ + export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \ + fi && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ cmake --build build --config Release --target llama-server -j$(nproc) && \ mkdir -p /app/lib && \ find build -name "*.so" -exec cp {} /app/lib \; diff --git a/Makefile b/Makefile index cfc74c1dc..9a079a237 100644 --- a/Makefile +++ b/Makefile @@ -815,7 +815,7 @@ ifdef GGML_MUSA else MUSA_PATH ?= /opt/musa endif - MTGPU_TARGETS ?= mp_21 mp_22 + MUSA_ARCHITECTURES ?= 21;22 MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib @@ -834,7 +834,8 @@ ifdef GGML_MUSA CXX := $(MUSA_PATH)/bin/clang++ MCC := $(CCACHE) $(MUSA_PATH)/bin/mcc - MUSAFLAGS += $(addprefix --cuda-gpu-arch=, $(MTGPU_TARGETS)) + MUSAFLAGS = -x musa -mtgpu + MUSAFLAGS += $(foreach arch,$(subst ;, ,$(MUSA_ARCHITECTURES)),--cuda-gpu-arch=mp_$(arch)) ifdef GGML_CUDA_FORCE_MMQ MUSAFLAGS += -DGGML_CUDA_FORCE_MMQ @@ -878,14 +879,14 @@ ggml/src/ggml-cuda/ggml-cuda.o: \ ggml/src/ggml-backend-impl.h \ ggml/src/ggml-common.h \ $(wildcard ggml/src/ggml-cuda/*.cuh) - $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -x musa -mtgpu -c -o $@ $< + $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -c -o $@ $< ggml/src/ggml-cuda/%.o: \ ggml/src/ggml-cuda/%.cu \ ggml/include/ggml.h \ ggml/src/ggml-common.h \ ggml/src/ggml-cuda/common.cuh - $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -x musa -mtgpu -c -o $@ $< + $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -c -o $@ $< endif # GGML_MUSA ifdef GGML_METAL diff --git a/ggml/src/ggml-musa/CMakeLists.txt b/ggml/src/ggml-musa/CMakeLists.txt index e1a69186e..415b2b2e0 100644 --- a/ggml/src/ggml-musa/CMakeLists.txt +++ b/ggml/src/ggml-musa/CMakeLists.txt @@ -20,6 +20,11 @@ find_package(MUSAToolkit) if (MUSAToolkit_FOUND) message(STATUS "MUSA Toolkit found") + if (NOT DEFINED MUSA_ARCHITECTURES) + set(MUSA_ARCHITECTURES "21;22") + endif() + message(STATUS "Using MUSA architectures: ${MUSA_ARCHITECTURES}") + file(GLOB GGML_HEADERS_MUSA "../ggml-cuda/*.cuh") list(APPEND GGML_HEADERS_MUSA "../../include/ggml-cuda.h") @@ -44,7 +49,11 @@ if (MUSAToolkit_FOUND) set_source_files_properties(${GGML_SOURCES_MUSA} PROPERTIES LANGUAGE CXX) foreach(SOURCE ${GGML_SOURCES_MUSA}) - set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22") + set(COMPILE_FLAGS "-x musa -mtgpu") + foreach(ARCH ${MUSA_ARCHITECTURES}) + set(COMPILE_FLAGS "${COMPILE_FLAGS} --cuda-gpu-arch=mp_${ARCH}") + endforeach() + set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS ${COMPILE_FLAGS}) endforeach() ggml_add_backend_library(ggml-musa