fix llava implementation

2024-12-27 20:04:35 +00:00 · 2023-10-16 16:31:17 -04:00 · 2023-10-16 16:31:17 -04:00 · 4d1804330e
commit 4d1804330e
parent d7eca255d7
3 changed files with 1535 additions and 1527 deletions
--- a/examples/server/index.html.hpp
+++ b/examples/server/index.html.hpp
--- a/examples/server/public/index.html
+++ b/examples/server/public/index.html
@ -446,7 +446,7 @@
        ).join("\n"),
      });
      if(selected_image) {
-        prompt = `A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER: [img-10]${msg}\nASSISTANT:`;
+        prompt = `A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:[img-10]${msg}\nASSISTANT:`;
      }
      await runLlama(prompt, {
        ...params.value,
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -285,7 +285,7 @@ struct llama_client_slot
    int32_t n_past = 0;
    int32_t n_decoded = 0;
    int32_t i_batch   = -1;
-    int32_t num_prompt_tokens = 0;
+    size_t num_prompt_tokens = 0;
    int32_t num_prompt_tokens_processed = 0;
    int32_t n_remaining = -1;

@ -347,6 +347,15 @@ struct llama_client_slot
            ctx_sampling.grammar = NULL;
        }

+#ifdef SERVER_MULTIMODAL_SUPPORT
+        for(slot_image img : images) {
+            free(img.image_embedding);
+            delete[] img.img_data.data;
+            img.prefix_prompt = "";
+        }
+        images.clear();
+#endif
+
        // llama_set_rng_seed(ctx, params.seed); in batched the seed matter???????
    }

@ -893,7 +902,7 @@ struct llama_server_context
            const auto json_prompt = (image_idx >= slot.images.size()) ?
                slot.params.input_suffix : // no more images, then process suffix prompt
                (json)(slot.images[image_idx].prefix_prompt);
-            std::vector<llama_token> append_tokens = tokenize(json_prompt, true); // has next image
+            std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
            for (int i = 0; i < append_tokens.size(); ++i) {
                batch.token [batch.n_tokens] = append_tokens[i];
                batch.pos   [batch.n_tokens] = slot.n_past;
@ -1810,7 +1819,6 @@ static void parse_options_completion(const json &body, llama_client_slot* slot,
    const auto &images_data = body.find("image_data");
    if (images_data != body.end() && images_data->is_array())
    {
-        slot->images.clear();
        for (const auto &img : *images_data)
        {
            slot_image img_sl;
@ -1821,10 +1829,10 @@ static void parse_options_completion(const json &body, llama_client_slot* slot,
            data_b64.clear();
            auto data = stbi_load_from_memory(image_buffer.data(), image_buffer.size(), &width, &height, &channels, 3);
            if(!data) {
-                LOG_TEE("slot %i - failed to load image\n", slot->id);
+                LOG_TEE("slot %i - failed to load image id= %i\n", slot->id, img_sl.id);
                return;
            }
-            LOG_TEE("slot %i - RGB image %i loaded (%i x %i)\n", slot->id, img_sl.id, width, height);
+            LOG_TEE("slot %i - image id= %i loaded (%i x %i)\n", slot->id, img_sl.id, width, height);
            img_sl.img_data.nx = width;
            img_sl.img_data.ny = height;
            img_sl.img_data.size = width * height * 3;