llama.cpp/ggml-kompute.h

#pragma once

#include <cstddef>
#include <vector>
#include <string>

struct ggml_kompute_context;

namespace vk {
    class DeviceMemory;
    class Buffer;
};

struct ggml_vk_memory {
    void *data = nullptr;
    size_t size = 0;
    vk::DeviceMemory *primaryMemory = nullptr;
    vk::Buffer *primaryBuffer = nullptr;
    vk::DeviceMemory *stagingMemory = nullptr;
    vk::Buffer *stagingBuffer = nullptr;
};

struct ggml_vk_device {
    int index = 0;
    int type = 0;           // same as VkPhysicalDeviceType
    size_t heapSize = 0;
    std::string name;
    std::string vendor;
    int subgroupSize = 0;
};

std::vector<ggml_vk_device> ggml_vk_available_devices(size_t memoryRequired);
bool ggml_vk_init_device(size_t memoryRequired, const std::string &device);
bool ggml_vk_init_device(const ggml_vk_device &device);
bool ggml_vk_init_device(int device);
bool ggml_vk_free_device();
bool ggml_vk_has_vulkan();
bool ggml_vk_has_device();
bool ggml_vk_using_vulkan();
ggml_vk_device ggml_vk_current_device();
struct ggml_kompute_context * ggml_vk_init(void);
bool ggml_vk_has_h2d_all(struct ggml_kompute_context * ctx);
void ggml_vk_free(struct ggml_kompute_context * ctx);
size_t ggml_vk_aligned_offset(size_t offset);
ggml_vk_memory ggml_vk_allocate(size_t size);
void ggml_vk_free_memory(ggml_vk_memory &memory);

void ggml_vk_add_buffer(
    struct ggml_kompute_context * ctx,
    const char * name,
    const ggml_vk_memory &memory);

void ggml_vk_h2d_all(struct ggml_kompute_context * ctx);
void ggml_vk_d2h_all(struct ggml_kompute_context * ctx);
void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf);
Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0. 2023-06-22 10:58:07 +00:00			`#pragma once`

			`#include <cstddef>`
			`#include <vector>`
			`#include <string>`

			`struct ggml_kompute_context;`

			`namespace vk {`
			`class DeviceMemory;`
			`class Buffer;`
			`};`

			`struct ggml_vk_memory {`
			`void *data = nullptr;`
			`size_t size = 0;`
			`vk::DeviceMemory *primaryMemory = nullptr;`
			`vk::Buffer *primaryBuffer = nullptr;`
			`vk::DeviceMemory *stagingMemory = nullptr;`
			`vk::Buffer *stagingBuffer = nullptr;`
			`};`

			`struct ggml_vk_device {`
			`int index = 0;`
			`int type = 0; // same as VkPhysicalDeviceType`
			`size_t heapSize = 0;`
			`std::string name;`
			`std::string vendor;`
Consolidate code for mat x vec kernels and use subgroups more extensively. 2023-09-29 14:02:22 +00:00			`int subgroupSize = 0;`
Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0. 2023-06-22 10:58:07 +00:00			`};`

			`std::vector<ggml_vk_device> ggml_vk_available_devices(size_t memoryRequired);`
			`bool ggml_vk_init_device(size_t memoryRequired, const std::string &device);`
			`bool ggml_vk_init_device(const ggml_vk_device &device);`
			`bool ggml_vk_init_device(int device);`
Completely revamp how we do object management with the vulkan backend and stop using so many static objects so we can tear down and bring up vulkan on new devices in the same runtime. 2023-09-12 17:04:55 +00:00			`bool ggml_vk_free_device();`
Switch to a dynamic dispatch table instead of linking hard against libvulkan. 2023-09-12 16:39:38 +00:00			`bool ggml_vk_has_vulkan();`
Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0. 2023-06-22 10:58:07 +00:00			`bool ggml_vk_has_device();`
Only use vulkan with known quant that work. 2023-09-14 13:58:28 +00:00			`bool ggml_vk_using_vulkan();`
Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0. 2023-06-22 10:58:07 +00:00			`ggml_vk_device ggml_vk_current_device();`
			`struct ggml_kompute_context * ggml_vk_init(void);`
			`bool ggml_vk_has_h2d_all(struct ggml_kompute_context * ctx);`
			`void ggml_vk_free(struct ggml_kompute_context * ctx);`
			`size_t ggml_vk_aligned_offset(size_t offset);`
			`ggml_vk_memory ggml_vk_allocate(size_t size);`
			`void ggml_vk_free_memory(ggml_vk_memory &memory);`

			`void ggml_vk_add_buffer(`
			`struct ggml_kompute_context * ctx,`
			`const char * name,`
			`const ggml_vk_memory &memory);`

			`void ggml_vk_h2d_all(struct ggml_kompute_context * ctx);`
			`void ggml_vk_d2h_all(struct ggml_kompute_context * ctx);`
			`void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);`
			`void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);`
			`void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf);`