merge fixup (e16b9fa4ba)

2025-01-05 00:04:36 +00:00 · 2023-11-14 15:54:26 -05:00 · 2023-11-14 15:54:26 -05:00 · 02c3309f6d
commit 02c3309f6d
parent 9c4dfd06e8
1 changed files with 23 additions and 15 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -3506,6 +3506,10 @@ struct llm_build_context {
    llama_buffer & buf_compute;
 #if defined(GGML_USE_KOMPUTE)
    ggml_kompute_context * ctx_kompute;
 #endif
    struct ggml_context * ctx0 = nullptr;
    // TODO: consider making the entire interface noexcept
@ -3535,7 +3539,11 @@ struct llm_build_context {
        kv_head       (worst_case ? n_ctx - n_tokens : kv_self.head),
        do_rope_shift (worst_case || kv_self.has_shift),
        cb            (cb),
-        buf_compute   (lctx.buf_compute) {
+        buf_compute   (lctx.buf_compute)
 #if defined(GGML_USE_KOMPUTE)
      , ctx_kompute   (lctx.ctx_kompute)
 #endif
        {
            GGML_ASSERT(!!kv_self.ctx);
            // all initializations should be done in init()
@ -3662,15 +3670,15 @@ struct llm_build_context {
        ggml_build_forward_expand(gf, cur);
 #if defined(GGML_USE_KOMPUTE)
-        if (lctx.ctx_kompute) {
+        if (ctx_kompute) {
-            if (!ggml_vk_has_h2d_all(lctx.ctx_kompute)) {
+            if (!ggml_vk_has_h2d_all(ctx_kompute)) {
-                ggml_vk_h2d_all(lctx.ctx_kompute);
+                ggml_vk_h2d_all(ctx_kompute);
            } else {
-                ggml_vk_h2d_tensor(lctx.ctx_kompute, to_device_tensor);
+                ggml_vk_h2d_tensor(ctx_kompute, to_device_tensor);
-                ggml_vk_h2d_tensor(lctx.ctx_kompute, inp_pos);
+                ggml_vk_h2d_tensor(ctx_kompute, inp_pos);
-                ggml_vk_h2d_tensor(lctx.ctx_kompute, KQ_mask);
+                ggml_vk_h2d_tensor(ctx_kompute, KQ_mask);
                if (K_shift) {
-                    ggml_vk_h2d_tensor(lctx.ctx_kompute, K_shift);
+                    ggml_vk_h2d_tensor(ctx_kompute, K_shift);
                }
            }
        }
@ -3907,15 +3915,15 @@ struct llm_build_context {
        ggml_build_forward_expand(gf, cur);
 #if defined(GGML_USE_KOMPUTE)
-        if (lctx.ctx_kompute) {
+        if (ctx_kompute) {
-            if (!ggml_vk_has_h2d_all(lctx.ctx_kompute)) {
+            if (!ggml_vk_has_h2d_all(ctx_kompute)) {
-                ggml_vk_h2d_all(lctx.ctx_kompute);
+                ggml_vk_h2d_all(ctx_kompute);
            } else {
-                ggml_vk_h2d_tensor(lctx.ctx_kompute, to_device_tensor);
+                ggml_vk_h2d_tensor(ctx_kompute, to_device_tensor);
-                ggml_vk_h2d_tensor(lctx.ctx_kompute, inp_pos);
+                ggml_vk_h2d_tensor(ctx_kompute, inp_pos);
-                ggml_vk_h2d_tensor(lctx.ctx_kompute, KQ_mask);
+                ggml_vk_h2d_tensor(ctx_kompute, KQ_mask);
                if (K_shift) {
-                    ggml_vk_h2d_tensor(lctx.ctx_kompute, K_shift);
+                    ggml_vk_h2d_tensor(ctx_kompute, K_shift);
                }
            }
        }