llama.cpp/ggml-kompute.h

#pragma once

#include "ggml-backend.h"

#include <cstddef>
#include <vector>
#include <string>

struct ggml_kompute_context;

namespace vk {
    class DeviceMemory;
    class Buffer;
};

struct ggml_vk_memory {
    void *data = nullptr;
    size_t size = 0;
    vk::DeviceMemory *primaryMemory = nullptr;
    vk::Buffer *primaryBuffer = nullptr;
    vk::DeviceMemory *stagingMemory = nullptr;
    vk::Buffer *stagingBuffer = nullptr;
};

struct ggml_vk_device {
    int index = 0;
    int type = 0;           // same as VkPhysicalDeviceType
    size_t heapSize = 0;
    std::string name;
    std::string vendor;
    int subgroupSize = 0;
};

std::vector<ggml_vk_device> ggml_vk_available_devices(size_t memoryRequired);
bool ggml_vk_init_device(size_t memoryRequired, const std::string &device);
bool ggml_vk_init_device(const ggml_vk_device &device);
bool ggml_vk_init_device(int device);
bool ggml_vk_free_device();
bool ggml_vk_has_vulkan();
bool ggml_vk_has_device();
bool ggml_vk_using_vulkan();
ggml_vk_device ggml_vk_current_device();
struct ggml_kompute_context * ggml_vk_init(void);
bool ggml_vk_has_h2d_all(struct ggml_kompute_context * ctx);
void ggml_vk_free(struct ggml_kompute_context * ctx);
size_t ggml_vk_aligned_offset(size_t offset);
ggml_vk_memory ggml_vk_allocate(size_t size);
void ggml_vk_free_memory(ggml_vk_memory &memory);

void ggml_vk_add_buffer(
    struct ggml_kompute_context * ctx,
    const char * name,
    const ggml_vk_memory &memory);

void ggml_vk_h2d_all(struct ggml_kompute_context * ctx);
void ggml_vk_d2h_all(struct ggml_kompute_context * ctx);
void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf);

//
// backend API
// user-code should use only these functions
//

#ifdef __cplusplus
extern "C" {
#endif

// forward declaration
typedef struct ggml_backend * ggml_backend_t;

GGML_API ggml_backend_t ggml_backend_kompute_init(void);

GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend);

GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(void);

#ifdef __cplusplus
}
#endif
Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0. 2023-06-22 10:58:07 +00:00			`#pragma once`

kompute : initial attempt at ggml-backend v2 support 2024-01-09 21:24:10 +00:00			`#include "ggml-backend.h"`

Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0. 2023-06-22 10:58:07 +00:00			`#include <cstddef>`
			`#include <vector>`
			`#include <string>`

			`struct ggml_kompute_context;`

			`namespace vk {`
			`class DeviceMemory;`
			`class Buffer;`
			`};`

			`struct ggml_vk_memory {`
			`void *data = nullptr;`
			`size_t size = 0;`
			`vk::DeviceMemory *primaryMemory = nullptr;`
			`vk::Buffer *primaryBuffer = nullptr;`
			`vk::DeviceMemory *stagingMemory = nullptr;`
			`vk::Buffer *stagingBuffer = nullptr;`
			`};`

			`struct ggml_vk_device {`
			`int index = 0;`
			`int type = 0; // same as VkPhysicalDeviceType`
			`size_t heapSize = 0;`
			`std::string name;`
			`std::string vendor;`
Consolidate code for mat x vec kernels and use subgroups more extensively. 2023-09-29 14:02:22 +00:00			`int subgroupSize = 0;`
Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0. 2023-06-22 10:58:07 +00:00			`};`

			`std::vector<ggml_vk_device> ggml_vk_available_devices(size_t memoryRequired);`
			`bool ggml_vk_init_device(size_t memoryRequired, const std::string &device);`
			`bool ggml_vk_init_device(const ggml_vk_device &device);`
			`bool ggml_vk_init_device(int device);`
Completely revamp how we do object management with the vulkan backend and stop using so many static objects so we can tear down and bring up vulkan on new devices in the same runtime. 2023-09-12 17:04:55 +00:00			`bool ggml_vk_free_device();`
Switch to a dynamic dispatch table instead of linking hard against libvulkan. 2023-09-12 16:39:38 +00:00			`bool ggml_vk_has_vulkan();`
Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0. 2023-06-22 10:58:07 +00:00			`bool ggml_vk_has_device();`
Only use vulkan with known quant that work. 2023-09-14 13:58:28 +00:00			`bool ggml_vk_using_vulkan();`
Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0. 2023-06-22 10:58:07 +00:00			`ggml_vk_device ggml_vk_current_device();`
			`struct ggml_kompute_context * ggml_vk_init(void);`
			`bool ggml_vk_has_h2d_all(struct ggml_kompute_context * ctx);`
			`void ggml_vk_free(struct ggml_kompute_context * ctx);`
			`size_t ggml_vk_aligned_offset(size_t offset);`
			`ggml_vk_memory ggml_vk_allocate(size_t size);`
			`void ggml_vk_free_memory(ggml_vk_memory &memory);`

			`void ggml_vk_add_buffer(`
			`struct ggml_kompute_context * ctx,`
			`const char * name,`
			`const ggml_vk_memory &memory);`

			`void ggml_vk_h2d_all(struct ggml_kompute_context * ctx);`
			`void ggml_vk_d2h_all(struct ggml_kompute_context * ctx);`
			`void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);`
			`void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);`
			`void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf);`
kompute : initial attempt at ggml-backend v2 support 2024-01-09 21:24:10 +00:00
			`//`
			`// backend API`
			`// user-code should use only these functions`
			`//`

attempt to get test-backend-ops working 2024-01-10 21:14:03 +00:00			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

kompute : initial attempt at ggml-backend v2 support 2024-01-09 21:24:10 +00:00			`// forward declaration`
			`typedef struct ggml_backend * ggml_backend_t;`

			`GGML_API ggml_backend_t ggml_backend_kompute_init(void);`

			`GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend);`

			`GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(void);`
attempt to get test-backend-ops working 2024-01-10 21:14:03 +00:00
			`#ifdef __cplusplus`
			`}`
			`#endif`