From 4d0924a8902010d31bd737b6f1f594943d120d0f Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Thu, 1 Feb 2024 19:25:24 +0100 Subject: [PATCH 01/94] Vulkan Phi Fix for AMD Proprietary Drivers (#5260) * Replace tanh to avoid NaN in gelu shader on AMD proprietary driver * Fix another Vulkan CPY buffer size bug --- ggml-vulkan-shaders.hpp | 132 +++++++++++++++++++----------------- ggml-vulkan.cpp | 17 +++-- ggml_vk_generate_shaders.py | 3 +- 3 files changed, 83 insertions(+), 69 deletions(-) diff --git a/ggml-vulkan-shaders.hpp b/ggml-vulkan-shaders.hpp index e2e9be22c..195410c02 100644 --- a/ggml-vulkan-shaders.hpp +++ b/ggml-vulkan-shaders.hpp @@ -14670,14 +14670,14 @@ const uint64_t f32_to_f16_fp32_len = 1596; unsigned char gelu_f32_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x45,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, +0x4b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00, 0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30, 0x00,0x00,0x00,0x00,0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00, 0x01,0x00,0x00,0x00,0x0f,0x00,0x09,0x00,0x05,0x00,0x00,0x00, 0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00, 0x0b,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x24,0x00,0x00,0x00, -0x2c,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, +0x38,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, 0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, 0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, 0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, @@ -14696,15 +14696,15 @@ unsigned char gelu_f32_data[] = { 0x22,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, 0x24,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x47,0x00,0x04,0x00,0x24,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x29,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x35,0x00,0x00,0x00, 0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x2a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x2a,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x36,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x36,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x2a,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2c,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x42,0x00,0x00,0x00, +0x36,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x38,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x38,0x00,0x00,0x00,0x21,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x48,0x00,0x00,0x00, 0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, 0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, 0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, @@ -14731,64 +14731,70 @@ unsigned char gelu_f32_data[] = { 0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x22,0x00,0x00,0x00, 0x3b,0x00,0x04,0x00,0x23,0x00,0x00,0x00,0x24,0x00,0x00,0x00, 0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x26,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x29,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x2a,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x2b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x2b,0x00,0x00,0x00,0x2c,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x11,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x2a,0x42,0x4c,0x3f, +0x2b,0x00,0x04,0x00,0x11,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, +0x00,0x00,0x80,0x3f,0x2b,0x00,0x04,0x00,0x11,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x13,0x27,0x37,0x3d,0x1d,0x00,0x03,0x00, +0x35,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x36,0x00,0x00,0x00,0x35,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x37,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x36,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x37,0x00,0x00,0x00,0x38,0x00,0x00,0x00, 0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x11,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x3f,0x2b,0x00,0x04,0x00, -0x11,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x00,0x00,0x80,0x3f, -0x2b,0x00,0x04,0x00,0x11,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x2a,0x42,0x4c,0x3f,0x2b,0x00,0x04,0x00,0x11,0x00,0x00,0x00, -0x35,0x00,0x00,0x00,0x13,0x27,0x37,0x3d,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x00,0x02,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00, -0x42,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x43,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00, -0x0c,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x44,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00, -0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x0e,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x17,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0xae,0x00,0x05,0x00,0x1a,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x1d,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x1b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x1c,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x43,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x1d,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x26,0x00,0x00,0x00, -0x27,0x00,0x00,0x00,0x24,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x11,0x00,0x00,0x00, -0x28,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0x85,0x00,0x05,0x00, +0x3a,0x00,0x00,0x00,0x00,0x00,0x00,0x3f,0x2b,0x00,0x04,0x00, +0x11,0x00,0x00,0x00,0x3d,0x00,0x00,0x00,0x00,0x00,0x00,0x40, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x46,0x00,0x00,0x00, +0x00,0x02,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x47,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2c,0x00,0x06,0x00, +0x09,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x46,0x00,0x00,0x00, +0x47,0x00,0x00,0x00,0x47,0x00,0x00,0x00,0x36,0x00,0x05,0x00, +0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, +0xf7,0x00,0x03,0x00,0x49,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x4a,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0x17,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x14,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x19,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0xae,0x00,0x05,0x00, +0x1a,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, +0x19,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x1d,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x1b,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, +0x1c,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x49,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x1d,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0x24,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x11,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0x27,0x00,0x00,0x00, +0x85,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x2c,0x00,0x00,0x00, +0x2a,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0x85,0x00,0x05,0x00, 0x11,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x28,0x00,0x00,0x00,0x0c,0x00,0x08,0x00,0x11,0x00,0x00,0x00, +0x33,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00, +0x30,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, +0x85,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x34,0x00,0x00,0x00, +0x2c,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x85,0x00,0x05,0x00, +0x11,0x00,0x00,0x00,0x3c,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, 0x28,0x00,0x00,0x00,0x85,0x00,0x05,0x00,0x11,0x00,0x00,0x00, -0x34,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x28,0x00,0x00,0x00, -0x85,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x35,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0x0c,0x00,0x08,0x00, -0x11,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x28,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x85,0x00,0x05,0x00,0x11,0x00,0x00,0x00, -0x3b,0x00,0x00,0x00,0x34,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x0c,0x00,0x06,0x00,0x11,0x00,0x00,0x00,0x3c,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x15,0x00,0x00,0x00,0x3b,0x00,0x00,0x00, -0x81,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x3d,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x3c,0x00,0x00,0x00,0x85,0x00,0x05,0x00, -0x11,0x00,0x00,0x00,0x3e,0x00,0x00,0x00,0x30,0x00,0x00,0x00, -0x3d,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x26,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x3f,0x00,0x00,0x00, -0x3e,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x43,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x43,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, -0x38,0x00,0x01,0x00, +0x3f,0x00,0x00,0x00,0x3d,0x00,0x00,0x00,0x34,0x00,0x00,0x00, +0x0c,0x00,0x06,0x00,0x11,0x00,0x00,0x00,0x40,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, +0x81,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x41,0x00,0x00,0x00, +0x40,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x88,0x00,0x05,0x00, +0x11,0x00,0x00,0x00,0x42,0x00,0x00,0x00,0x3d,0x00,0x00,0x00, +0x41,0x00,0x00,0x00,0x83,0x00,0x05,0x00,0x11,0x00,0x00,0x00, +0x43,0x00,0x00,0x00,0x3d,0x00,0x00,0x00,0x42,0x00,0x00,0x00, +0x85,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x44,0x00,0x00,0x00, +0x3c,0x00,0x00,0x00,0x43,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x26,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x38,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, +0x45,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, +0x49,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x49,0x00,0x00,0x00, +0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t gelu_f32_len = 1408; +const uint64_t gelu_f32_len = 1484; unsigned char get_rows_f16_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index bccc40bf5..b1e0006bb 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -2876,6 +2876,9 @@ static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggm x_sz = ggml_nbytes(src0); d_sz = ggml_nbytes(dst); + if (extra_src0->offset + x_sz >= d_X->size) { + x_sz = VK_WHOLE_SIZE; + } if (extra->offset + d_sz >= d_D->size) { d_sz = VK_WHOLE_SIZE; } @@ -2911,12 +2914,16 @@ static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggm break; } - x_sz *= ne02 * ne03; - if (y_sz != VK_WHOLE_SIZE) { - y_sz *= ne12 * ne13; - } if (op != GGML_OP_CPY) { - d_sz *= ne02 * ne03; + if (x_sz != VK_WHOLE_SIZE) { + x_sz *= ne02 * ne03; + } + if (y_sz != VK_WHOLE_SIZE) { + y_sz *= ne12 * ne13; + } + if (d_sz != VK_WHOLE_SIZE) { + d_sz *= ne02 * ne03; + } } if (!use_src1 && op == GGML_OP_SOFT_MAX) { diff --git a/ggml_vk_generate_shaders.py b/ggml_vk_generate_shaders.py index 6b1b82bf3..67981a751 100644 --- a/ggml_vk_generate_shaders.py +++ b/ggml_vk_generate_shaders.py @@ -1689,7 +1689,8 @@ void main() { } const float xi = float(data_a[i]); - data_d[i] = D_TYPE(0.5f*xi*(1.0f + tanh(SQRT_2_OVER_PI*xi*(1.0f + GELU_COEF_A*xi*xi)))); + const float val = SQRT_2_OVER_PI*xi*(1.0f + GELU_COEF_A*xi*xi); + data_d[i] = D_TYPE(0.5f*xi*(2.0f - 2.0f / (exp(2 * val) + 1))); } """ From 128dcbd3c9c4b12f42b560a4430427d7b2828628 Mon Sep 17 00:00:00 2001 From: Neo Zhang Jianyu Date: Fri, 2 Feb 2024 03:48:53 +0800 Subject: [PATCH 02/94] add --no-mmap in llama-bench (#5257) * add --no-mmap, show sycl backend * fix conflict * fix code format, change print for --no-mmap * ren no_mmap to mmap, show mmap when not default value in printer * update guide for mmap * mv position to reduce model reload --- README-sycl.md | 2 +- examples/llama-bench/llama-bench.cpp | 60 +++++++++++++++++++++++++--- ggml-sycl.cpp | 34 +++++++++++++++- ggml-sycl.h | 3 +- 4 files changed, 89 insertions(+), 10 deletions(-) diff --git a/README-sycl.md b/README-sycl.md index 2b2cfe03a..b8ee212b8 100644 --- a/README-sycl.md +++ b/README-sycl.md @@ -405,7 +405,7 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block. - Solution: add **--no-mmap**. + Solution: add **--no-mmap** or **--mmap 0**. ## Q&A diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index c5a6f744e..e36c061a2 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -20,6 +20,7 @@ #include "llama.h" #include "common.h" #include "ggml-cuda.h" +#include "ggml-sycl.h" // utils static uint64_t get_time_ns() { @@ -120,6 +121,22 @@ static std::string get_gpu_info() { id += "/"; } } +#endif +#ifdef GGML_USE_SYCL + int device_list[GGML_SYCL_MAX_DEVICES]; + ggml_sycl_get_gpu_list(device_list, GGML_SYCL_MAX_DEVICES); + + for (int i = 0; i < GGML_SYCL_MAX_DEVICES; i++) { + if (device_list[i] >0 ){ + char buf[128]; + ggml_sycl_get_device_description(i, buf, sizeof(buf)); + id += buf; + id += "/"; + } + } + if (id.length() >2 ) { + id.pop_back(); + } #endif // TODO: other backends return id; @@ -161,6 +178,7 @@ struct cmd_params { std::vector no_kv_offload; std::vector mul_mat_q; std::vector> tensor_split; + std::vector use_mmap; int reps; bool verbose; output_formats output_format; @@ -180,6 +198,7 @@ static const cmd_params cmd_params_defaults = { /* no_kv_offload */ {false}, /* mul_mat_q */ {true}, /* tensor_split */ {std::vector(llama_max_devices(), 0.0f)}, + /* use_mmap */ {true}, /* reps */ 5, /* verbose */ false, /* output_format */ MARKDOWN @@ -201,6 +220,7 @@ static void print_usage(int /* argc */, char ** argv) { printf(" -sm, --split-mode (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str()); printf(" -mg, --main-gpu (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str()); printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str()); + printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str()); printf(" -mmq, --mul-mat-q <0|1> (default: %s)\n", join(cmd_params_defaults.mul_mat_q, ",").c_str()); printf(" -ts, --tensor_split (default: 0)\n"); printf(" -r, --repetitions (default: %d)\n", cmd_params_defaults.reps); @@ -370,6 +390,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { } auto p = split(argv[i], split_delim); params.mul_mat_q.insert(params.mul_mat_q.end(), p.begin(), p.end()); + } else if (arg == "-mmp" || arg == "--mmap") { + if (++i >= argc) { + invalid_param = true; + break; + } + auto p = split(argv[i], split_delim); + params.use_mmap.insert(params.use_mmap.end(), p.begin(), p.end()); } else if (arg == "-ts" || arg == "--tensor-split") { if (++i >= argc) { invalid_param = true; @@ -441,6 +468,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { if (params.no_kv_offload.empty()){ params.no_kv_offload = cmd_params_defaults.no_kv_offload; } if (params.mul_mat_q.empty()) { params.mul_mat_q = cmd_params_defaults.mul_mat_q; } if (params.tensor_split.empty()) { params.tensor_split = cmd_params_defaults.tensor_split; } + if (params.use_mmap.empty()) { params.use_mmap = cmd_params_defaults.use_mmap; } if (params.n_threads.empty()) { params.n_threads = cmd_params_defaults.n_threads; } return params; @@ -460,6 +488,7 @@ struct cmd_params_instance { bool no_kv_offload; bool mul_mat_q; std::vector tensor_split; + bool use_mmap; llama_model_params to_llama_mparams() const { llama_model_params mparams = llama_model_default_params(); @@ -468,6 +497,7 @@ struct cmd_params_instance { mparams.split_mode = split_mode; mparams.main_gpu = main_gpu; mparams.tensor_split = tensor_split.data(); + mparams.use_mmap = use_mmap; return mparams; } @@ -477,6 +507,7 @@ struct cmd_params_instance { n_gpu_layers == other.n_gpu_layers && split_mode == other.split_mode && main_gpu == other.main_gpu && + use_mmap == other.use_mmap && tensor_split == other.tensor_split; } @@ -503,6 +534,7 @@ static std::vector get_cmd_params_instances(const cmd_param for (const auto & sm : params.split_mode) for (const auto & mg : params.main_gpu) for (const auto & ts : params.tensor_split) + for (const auto & mmp : params.use_mmap) for (const auto & nb : params.n_batch) for (const auto & tk : params.type_k) for (const auto & tv : params.type_v) @@ -527,6 +559,7 @@ static std::vector get_cmd_params_instances(const cmd_param /* .no_kv_offload= */ nkvo, /* .mul_mat_q = */ mmq, /* .tensor_split = */ ts, + /* .use_mmap = */ mmp, }; instances.push_back(instance); } @@ -549,6 +582,7 @@ static std::vector get_cmd_params_instances(const cmd_param /* .no_kv_offload= */ nkvo, /* .mul_mat_q = */ mmq, /* .tensor_split = */ ts, + /* .use_mmap = */ mmp, }; instances.push_back(instance); } @@ -565,6 +599,7 @@ struct test { static const bool vulkan; static const bool kompute; static const bool metal; + static const bool sycl; static const bool gpu_blas; static const bool blas; static const std::string cpu_info; @@ -583,6 +618,7 @@ struct test { bool no_kv_offload; bool mul_mat_q; std::vector tensor_split; + bool use_mmap; int n_prompt; int n_gen; std::string test_time; @@ -605,6 +641,7 @@ struct test { no_kv_offload = inst.no_kv_offload; mul_mat_q = inst.mul_mat_q; tensor_split = inst.tensor_split; + use_mmap = inst.use_mmap; n_prompt = inst.n_prompt; n_gen = inst.n_gen; // RFC 3339 date-time format @@ -654,25 +691,29 @@ struct test { if (metal) { return "Metal"; } + if (sycl) { + return GGML_SYCL_NAME; + } if (gpu_blas) { return "GPU BLAS"; } if (blas) { return "BLAS"; } + return "CPU"; } static const std::vector & get_fields() { static const std::vector fields = { "build_commit", "build_number", - "cuda", "opencl", "vulkan", "kompute", "metal", "gpu_blas", "blas", + "cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "cpu_info", "gpu_info", "model_filename", "model_type", "model_size", "model_n_params", "n_batch", "n_threads", "type_k", "type_v", "n_gpu_layers", "split_mode", "main_gpu", "no_kv_offload", - "mul_mat_q", "tensor_split", + "mul_mat_q", "tensor_split", "use_mmap", "n_prompt", "n_gen", "test_time", "avg_ns", "stddev_ns", "avg_ts", "stddev_ts" @@ -691,8 +732,8 @@ struct test { return INT; } if (field == "cuda" || field == "opencl" || field == "vulkan" || field == "kompute" || field == "metal" || - field == "gpu_blas" || field == "blas" || field == "f16_kv" || field == "no_kv_offload" || - field == "mul_mat_q") { + field == "gpu_blas" || field == "blas" || field == "sycl" ||field == "f16_kv" || field == "no_kv_offload" || + field == "mul_mat_q" || field == "use_mmap") { return BOOL; } if (field == "avg_ts" || field == "stddev_ts") { @@ -720,13 +761,13 @@ struct test { std::vector values = { build_commit, std::to_string(build_number), std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(vulkan), - std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas), + std::to_string(metal), std::to_string(sycl), std::to_string(gpu_blas), std::to_string(blas), cpu_info, gpu_info, model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params), std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v), std::to_string(n_gpu_layers), split_mode_str(split_mode), std::to_string(main_gpu), std::to_string(no_kv_offload), - std::to_string(mul_mat_q), tensor_split_str, + std::to_string(mul_mat_q), tensor_split_str, std::to_string(use_mmap), std::to_string(n_prompt), std::to_string(n_gen), test_time, std::to_string(avg_ns()), std::to_string(stdev_ns()), std::to_string(avg_ts()), std::to_string(stdev_ts()) @@ -753,6 +794,7 @@ const bool test::kompute = !!ggml_cpu_has_kompute(); const bool test::metal = !!ggml_cpu_has_metal(); const bool test::gpu_blas = !!ggml_cpu_has_gpublas(); const bool test::blas = !!ggml_cpu_has_blas(); +const bool test::sycl = !!ggml_cpu_has_sycl(); const std::string test::cpu_info = get_cpu_info(); const std::string test::gpu_info = get_gpu_info(); @@ -895,6 +937,9 @@ struct markdown_printer : public printer { if (field == "no_kv_offload") { return "nkvo"; } + if (field == "use_mmap") { + return "mmap"; + } if (field == "tensor_split") { return "ts"; } @@ -938,6 +983,9 @@ struct markdown_printer : public printer { if (params.tensor_split.size() > 1 || params.tensor_split != cmd_params_defaults.tensor_split) { fields.push_back("tensor_split"); } + if (params.use_mmap.size() > 1 || params.use_mmap != cmd_params_defaults.use_mmap) { + fields.push_back("use_mmap"); + } fields.push_back("test"); fields.push_back("t/s"); diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 1cc55ef52..e8ba48353 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -2928,7 +2928,6 @@ void ggml_sycl_set_main_device(int main_device); void ggml_sycl_set_mul_mat_q(bool mul_mat_q); void ggml_sycl_set_scratch_size(size_t scratch_size); void ggml_sycl_free_scratch(void); -int ggml_sycl_get_device_count(void); void ggml_sycl_get_device_description(int device, char * description, size_t description_size); bool ggml_backend_is_sycl(ggml_backend_t backend); int ggml_backend_sycl_get_device(ggml_backend_t backend); @@ -14493,6 +14492,37 @@ bool ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_ return true; } +GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len) try { + int max_compute_units = -1; + for(int i=0;i Date: Thu, 1 Feb 2024 23:20:13 -0800 Subject: [PATCH 03/94] llama : fix memory leak in llama_batch_free (#5252) The llama_batch_init allocates memory for a fixed number of tokens. However, the llama_batch_free only frees memory for the number of tokens that were added to the batch. This change-set uses a null terminated array for the batch seq_id, and frees all the elements until the nullptr is reached. This change-set also changes the name of the first parameter from `n_tokens` to `n_tokens_alloc` to more clearly indicate that this value is the number of tokens allocated to the batch, not the number of tokens in the batch. --- llama.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/llama.cpp b/llama.cpp index e8f44c2cb..6bf7f9efb 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11377,22 +11377,24 @@ struct llama_batch llama_batch_get_one( }; } -struct llama_batch llama_batch_init(int32_t n_tokens, int32_t embd, int32_t n_seq_max) { +struct llama_batch llama_batch_init(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) { llama_batch batch = { 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, 0, 0, }; if (embd) { - batch.embd = (float *) malloc(sizeof(float) * n_tokens * embd); + batch.embd = (float *) malloc(sizeof(float) * n_tokens_alloc * embd); } else { - batch.token = (llama_token *) malloc(sizeof(llama_token) * n_tokens); + batch.token = (llama_token *) malloc(sizeof(llama_token) * n_tokens_alloc); } - batch.pos = (llama_pos *) malloc(sizeof(llama_pos) * n_tokens); - batch.n_seq_id = (int32_t *) malloc(sizeof(int32_t) * n_tokens); - batch.seq_id = (llama_seq_id **) malloc(sizeof(llama_seq_id *) * n_tokens); - for (int i = 0; i < n_tokens; ++i) { + batch.pos = (llama_pos *) malloc(sizeof(llama_pos) * n_tokens_alloc); + batch.n_seq_id = (int32_t *) malloc(sizeof(int32_t) * n_tokens_alloc); + batch.seq_id = (llama_seq_id **) malloc(sizeof(llama_seq_id *) * (n_tokens_alloc + 1)); + for (int i = 0; i < n_tokens_alloc; ++i) { batch.seq_id[i] = (llama_seq_id *) malloc(sizeof(llama_seq_id) * n_seq_max); } - batch.logits = (int8_t *) malloc(sizeof(int8_t) * n_tokens); + batch.seq_id[n_tokens_alloc] = nullptr; + + batch.logits = (int8_t *) malloc(sizeof(int8_t) * n_tokens_alloc); return batch; } @@ -11403,7 +11405,7 @@ void llama_batch_free(struct llama_batch batch) { if (batch.pos) free(batch.pos); if (batch.n_seq_id) free(batch.n_seq_id); if (batch.seq_id) { - for (int i = 0; i < batch.n_tokens; ++i) { + for (int i = 0; batch.seq_id[i] != nullptr; ++i) { free(batch.seq_id[i]); } free(batch.seq_id); From af3ba5d94627d337e32a95129e31a3064c459f6b Mon Sep 17 00:00:00 2001 From: Neo Zhang Jianyu Date: Fri, 2 Feb 2024 15:53:27 +0800 Subject: [PATCH 04/94] [SYCL] update guide of SYCL backend (#5254) * update guide for make installation, memory, gguf model link, rm todo for windows build * add vs install requirement * update for gpu device check * update help of llama-bench * fix grammer issues --- README-sycl.md | 64 +++++++++++++++++++++++++++----- examples/llama-bench/README.md | 34 ++++++++++------- examples/sycl/win-run-llama2.bat | 2 +- 3 files changed, 77 insertions(+), 23 deletions(-) diff --git a/README-sycl.md b/README-sycl.md index b8ee212b8..f7edc1c3e 100644 --- a/README-sycl.md +++ b/README-sycl.md @@ -42,6 +42,8 @@ For Intel CPU, recommend to use llama.cpp for X86 (Intel MKL building). ## Intel GPU +### Verified + |Intel GPU| Status | Verified Model| |-|-|-| |Intel Data Center Max Series| Support| Max 1550| @@ -50,6 +52,17 @@ For Intel CPU, recommend to use llama.cpp for X86 (Intel MKL building). |Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake| |Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7| +Note: If the EUs (Execution Unit) in iGPU is less than 80, the inference speed will be too slow to use. + +### Memory + +The memory is a limitation to run LLM on GPUs. + +When run llama.cpp, there is print log to show the applied memory on GPU. You could know how much memory to be used in your case. Like `llm_load_tensors: buffer size = 3577.56 MiB`. + +For iGPU, please make sure the shared memory from host memory is enough. For llama-2-7b.Q4_0, recommend the host memory is 8GB+. + +For dGPU, please make sure the device memory is enough. For llama-2-7b.Q4_0, recommend the device memory is 4GB+. ## Linux @@ -105,7 +118,7 @@ source /opt/intel/oneapi/setvars.sh sycl-ls ``` -There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**. +There should be one or more level-zero devices. Please confirm that at least one GPU is present, like **[ext_oneapi_level_zero:gpu:0]**. Output (example): ``` @@ -152,6 +165,8 @@ Note: 1. Put model file to folder **models** +You could download [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_0.gguf) as example. + 2. Enable oneAPI running environment ``` @@ -223,7 +238,13 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device Please install Intel GPU driver by official guide: [Install GPU Drivers](https://www.intel.com/content/www/us/en/products/docs/discrete-gpus/arc/software/drivers.html). -2. Install Intel® oneAPI Base toolkit. +Note: **The driver is mandatory for compute function**. + +2. Install Visual Studio. + +Please install [Visual Studio](https://visualstudio.microsoft.com/) which impact oneAPI environment enabling in Windows. + +3. Install Intel® oneAPI Base toolkit. a. Please follow the procedure in [Get the Intel® oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html). @@ -252,7 +273,7 @@ In oneAPI command line: sycl-ls ``` -There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**. +There should be one or more level-zero devices. Please confirm that at least one GPU is present, like **[ext_oneapi_level_zero:gpu:0]**. Output (example): ``` @@ -260,15 +281,21 @@ Output (example): [opencl:cpu:1] Intel(R) OpenCL, 11th Gen Intel(R) Core(TM) i7-1185G7 @ 3.00GHz OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000] [opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Iris(R) Xe Graphics OpenCL 3.0 NEO [31.0.101.5186] [ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Iris(R) Xe Graphics 1.3 [1.3.28044] - ``` -3. Install cmake & make +4. Install cmake & make -a. Download & install cmake for windows: https://cmake.org/download/ +a. Download & install cmake for Windows: https://cmake.org/download/ -b. Download & install make for windows provided by mingw-w64: https://www.mingw-w64.org/downloads/ +b. Download & install make for Windows provided by mingw-w64 +- Download binary package for Windows in https://github.com/niXman/mingw-builds-binaries/releases. + + Like [x86_64-13.2.0-release-win32-seh-msvcrt-rt_v11-rev1.7z](https://github.com/niXman/mingw-builds-binaries/releases/download/13.2.0-rt_v11-rev1/x86_64-13.2.0-release-win32-seh-msvcrt-rt_v11-rev1.7z). + +- Unzip the binary package. In the **bin** sub-folder and rename **xxx-make.exe** to **make.exe**. + +- Add the **bin** folder path in the Windows system PATH environment. ### Build locally: @@ -309,6 +336,8 @@ Note: 1. Put model file to folder **models** +You could download [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_0.gguf) as example. + 2. Enable oneAPI running environment - In Search, input 'oneAPI'. @@ -419,8 +448,25 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device Miss to enable oneAPI running environment. +- Meet compile error. + + Remove folder **build** and try again. + +- I can **not** see **[ext_oneapi_level_zero:gpu:0]** afer install GPU driver in Linux. + + Please run **sudo sycl-ls**. + + If you see it in result, please add video/render group to your ID: + + ``` + sudo usermod -aG render username + sudo usermod -aG video username + ``` + + Then **relogin**. + + If you do not see it, please check the installation GPU steps again. + ## Todo -- Support to build in Windows. - - Support multiple cards. diff --git a/examples/llama-bench/README.md b/examples/llama-bench/README.md index d02824bfa..374e40a7d 100644 --- a/examples/llama-bench/README.md +++ b/examples/llama-bench/README.md @@ -23,19 +23,23 @@ usage: ./llama-bench [options] options: -h, --help - -m, --model (default: models/7B/ggml-model-q4_0.gguf) - -p, --n-prompt (default: 512) - -n, --n-gen (default: 128) - -b, --batch-size (default: 512) - --memory-f32 <0|1> (default: 0) - -t, --threads (default: 16) - -ngl N, --n-gpu-layers (default: 99) - -mg i, --main-gpu (default: 0) - -mmq, --mul-mat-q <0|1> (default: 1) - -ts, --tensor_split - -r, --repetitions (default: 5) - -o, --output (default: md) - -v, --verbose (default: 0) + -m, --model (default: models/7B/ggml-model-q4_0.gguf) + -p, --n-prompt (default: 512) + -n, --n-gen (default: 128) + -b, --batch-size (default: 512) + -ctk , --cache-type-k (default: f16) + -ctv , --cache-type-v (default: f16) + -t, --threads (default: 112) + -ngl, --n-gpu-layers (default: 99) + -sm, --split-mode (default: layer) + -mg, --main-gpu (default: 0) + -nkvo, --no-kv-offload <0|1> (default: 0) + -mmp, --mmap <0|1> (default: 1) + -mmq, --mul-mat-q <0|1> (default: 1) + -ts, --tensor_split (default: 0) + -r, --repetitions (default: 5) + -o, --output (default: md) + -v, --verbose (default: 0) Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times. ``` @@ -51,6 +55,10 @@ Each test is repeated the number of times given by `-r`, and the results are ave For a description of the other options, see the [main example](../main/README.md). +Note: + +- When using SYCL backend, there would be hang issue in some cases. Please set `--mmp 0`. + ## Examples ### Text generation with different models diff --git a/examples/sycl/win-run-llama2.bat b/examples/sycl/win-run-llama2.bat index 28d935541..cf621c675 100644 --- a/examples/sycl/win-run-llama2.bat +++ b/examples/sycl/win-run-llama2.bat @@ -2,7 +2,7 @@ :: Copyright (C) 2024 Intel Corporation :: SPDX-License-Identifier: MIT -INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" +set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" @call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force From e805f0fa9951081ce0a86378a7aa52b6f636b82d Mon Sep 17 00:00:00 2001 From: "Meng, Hengyu" Date: Fri, 2 Feb 2024 15:54:14 +0800 Subject: [PATCH 05/94] [SYCL] get MAX_MEM_ALLOC from device property (#5270) * get max alloc size from device prop * fix macro typo --- ggml-sycl.cpp | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index e8ba48353..4ee2eed38 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -337,6 +337,7 @@ namespace dpct } size_t get_global_mem_size() const { return _global_mem_size; } size_t get_local_mem_size() const { return _local_mem_size; } + size_t get_max_mem_alloc_size() const { return _max_mem_alloc_size; } /// Returns the maximum clock rate of device's global memory in kHz. If /// compiler does not support this API then returns default value 3200000 kHz. unsigned int get_memory_clock_rate() const { return _memory_clock_rate; } @@ -398,6 +399,10 @@ namespace dpct { _local_mem_size = local_mem_size; } + void set_max_mem_alloc_size(size_t max_mem_alloc_size) + { + _max_mem_alloc_size = max_mem_alloc_size; + } void set_max_work_group_size(int max_work_group_size) { _max_work_group_size = max_work_group_size; @@ -465,6 +470,7 @@ namespace dpct int _max_register_size_per_work_group; size_t _global_mem_size; size_t _local_mem_size; + size_t _max_mem_alloc_size; size_t _max_nd_range_size[3]; int _max_nd_range_size_i[3]; uint32_t _device_id; @@ -516,6 +522,7 @@ namespace dpct dev.get_info()); prop.set_global_mem_size(dev.get_info()); prop.set_local_mem_size(dev.get_info()); + prop.set_max_mem_alloc_size(dev.get_info()); #if (defined(SYCL_EXT_INTEL_DEVICE_INFO) && SYCL_EXT_INTEL_DEVICE_INFO >= 6) if (dev.has(sycl::aspect::ext_intel_memory_clock_rate)) @@ -644,6 +651,11 @@ namespace dpct return get_device_info().get_global_mem_size(); } + size_t get_max_mem_alloc_size() const + { + return get_device_info().get_max_mem_alloc_size(); + } + /// Get the number of bytes of free and total memory on the SYCL device. /// \param [out] free_memory The number of bytes of free memory on the SYCL device. /// \param [out] total_memory The number of bytes of total memory on the SYCL device. @@ -11311,10 +11323,10 @@ void ggml_init_sycl() try { GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES); int64_t total_vram = 0; -#if defined(GGML_SYCL_FP16) - fprintf(stderr, "%s: GGML_SYCL_FP16: yes\n", __func__); +#if defined(GGML_SYCL_F16) + fprintf(stderr, "%s: GGML_SYCL_F16: yes\n", __func__); #else - fprintf(stderr, "%s: GGML_SYCL_FP16: no\n", __func__); + fprintf(stderr, "%s: GGML_SYCL_F16: no\n", __func__); #endif @@ -14788,6 +14800,12 @@ static size_t ggml_backend_sycl_buffer_type_get_alignment(ggml_backend_buffer_ty UNUSED(buft); } +static size_t ggml_backend_sycl_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { + return dpct::get_current_device().get_max_mem_alloc_size(); + + UNUSED(buft); +} + static size_t ggml_backend_sycl_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { int64_t row_low = 0; int64_t row_high = ggml_nrows(tensor); @@ -14818,7 +14836,7 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = { /* .get_name = */ ggml_backend_sycl_buffer_type_name, /* .alloc_buffer = */ ggml_backend_sycl_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_sycl_buffer_type_get_alignment, - /* .get_max_size = */ NULL, // TODO: return device.maxBufferLength + /* .get_max_size = */ ggml_backend_sycl_buffer_type_get_max_size, /* .get_alloc_size = */ ggml_backend_sycl_buffer_type_get_alloc_size, /* .supports_backend = */ ggml_backend_sycl_buffer_type_supports_backend, /* .is_host = */ nullptr, From 6b91b1e0a92ac2e4e269eec6361ca53a61ced6c6 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 2 Feb 2024 08:56:31 +0100 Subject: [PATCH 06/94] docker : add build for SYCL, Vulkan + update readme (#5228) * add vulkan dockerfile * intel dockerfile: compile sycl by default * fix vulkan dockerfile * add docs for vulkan * docs: sycl build in docker * docs: remove trailing spaces * docs: sycl: add docker section * docs: clarify install vulkan SDK outside docker * sycl: use intel/oneapi-basekit docker image * docs: correct TOC * docs: correct docker image for Intel oneMKL --- .devops/main-intel.Dockerfile | 16 ++--- .devops/main-vulkan.Dockerfile | 29 +++++++++ .devops/server-intel.Dockerfile | 15 +++-- .devops/server-vulkan.Dockerfile | 29 +++++++++ README-sycl.md | 102 +++++++++++++++++++------------ README.md | 64 ++++++++++++++----- 6 files changed, 188 insertions(+), 67 deletions(-) create mode 100644 .devops/main-vulkan.Dockerfile create mode 100644 .devops/server-vulkan.Dockerfile diff --git a/.devops/main-intel.Dockerfile b/.devops/main-intel.Dockerfile index e1e6acc24..572e5d8ea 100644 --- a/.devops/main-intel.Dockerfile +++ b/.devops/main-intel.Dockerfile @@ -1,8 +1,8 @@ ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04 -ARG UBUNTU_VERSION=22.04 -FROM intel/hpckit:$ONEAPI_VERSION as build +FROM intel/oneapi-basekit:$ONEAPI_VERSION as build +ARG LLAMA_SYCL_F16=OFF RUN apt-get update && \ apt-get install -y git @@ -10,16 +10,18 @@ WORKDIR /app COPY . . -# for some reasons, "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DLLAMA_NATIVE=ON" give worse performance RUN mkdir build && \ cd build && \ - cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx && \ - cmake --build . --config Release --target main server + if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ + echo "LLAMA_SYCL_F16 is set" && \ + export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ + fi && \ + cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \ + cmake --build . --config Release --target main -FROM ubuntu:$UBUNTU_VERSION as runtime +FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime COPY --from=build /app/build/bin/main /main -COPY --from=build /app/build/bin/server /server ENV LC_ALL=C.utf8 diff --git a/.devops/main-vulkan.Dockerfile b/.devops/main-vulkan.Dockerfile new file mode 100644 index 000000000..bca460365 --- /dev/null +++ b/.devops/main-vulkan.Dockerfile @@ -0,0 +1,29 @@ +ARG UBUNTU_VERSION=jammy + +FROM ubuntu:$UBUNTU_VERSION as build + +# Install build tools +RUN apt update && apt install -y git build-essential cmake wget + +# Install Vulkan SDK +RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ + wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \ + apt update -y && \ + apt-get install -y vulkan-sdk + +# Build it +WORKDIR /app +COPY . . +RUN mkdir build && \ + cd build && \ + cmake .. -DLLAMA_VULKAN=1 && \ + cmake --build . --config Release --target main + +# Clean up +WORKDIR / +RUN cp /app/build/bin/main /main && \ + rm -rf /app + +ENV LC_ALL=C.utf8 + +ENTRYPOINT [ "/main" ] diff --git a/.devops/server-intel.Dockerfile b/.devops/server-intel.Dockerfile index e343d278c..312f2df80 100644 --- a/.devops/server-intel.Dockerfile +++ b/.devops/server-intel.Dockerfile @@ -1,8 +1,8 @@ ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04 -ARG UBUNTU_VERSION=22.04 -FROM intel/hpckit:$ONEAPI_VERSION as build +FROM intel/oneapi-basekit:$ONEAPI_VERSION as build +ARG LLAMA_SYCL_F16=OFF RUN apt-get update && \ apt-get install -y git @@ -10,13 +10,16 @@ WORKDIR /app COPY . . -# for some reasons, "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DLLAMA_NATIVE=ON" give worse performance RUN mkdir build && \ cd build && \ - cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx && \ - cmake --build . --config Release --target main server + if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ + echo "LLAMA_SYCL_F16 is set" && \ + export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ + fi && \ + cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \ + cmake --build . --config Release --target server -FROM ubuntu:$UBUNTU_VERSION as runtime +FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime COPY --from=build /app/build/bin/server /server diff --git a/.devops/server-vulkan.Dockerfile b/.devops/server-vulkan.Dockerfile new file mode 100644 index 000000000..e0add6fc3 --- /dev/null +++ b/.devops/server-vulkan.Dockerfile @@ -0,0 +1,29 @@ +ARG UBUNTU_VERSION=jammy + +FROM ubuntu:$UBUNTU_VERSION as build + +# Install build tools +RUN apt update && apt install -y git build-essential cmake wget + +# Install Vulkan SDK +RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ + wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \ + apt update -y && \ + apt-get install -y vulkan-sdk + +# Build it +WORKDIR /app +COPY . . +RUN mkdir build && \ + cd build && \ + cmake .. -DLLAMA_VULKAN=1 && \ + cmake --build . --config Release --target server + +# Clean up +WORKDIR / +RUN cp /app/build/bin/server /server && \ + rm -rf /app + +ENV LC_ALL=C.utf8 + +ENTRYPOINT [ "/server" ] diff --git a/README-sycl.md b/README-sycl.md index f7edc1c3e..7aa4274a9 100644 --- a/README-sycl.md +++ b/README-sycl.md @@ -1,22 +1,15 @@ # llama.cpp for SYCL -[Background](#background) - -[OS](#os) - -[Intel GPU](#intel-gpu) - -[Linux](#linux) - -[Windows](#windows) - -[Environment Variable](#environment-variable) - -[Known Issue](#known-issue) - -[Q&A](#q&a) - -[Todo](#todo) +- [Background](#background) +- [OS](#os) +- [Intel GPU](#intel-gpu) +- [Docker](#docker) +- [Linux](#linux) +- [Windows](#windows) +- [Environment Variable](#environment-variable) +- [Known Issue](#known-issue) +- [Q&A](#q&a) +- [Todo](#todo) ## Background @@ -36,7 +29,7 @@ For Intel CPU, recommend to use llama.cpp for X86 (Intel MKL building). |OS|Status|Verified| |-|-|-| -|Linux|Support|Ubuntu 22.04| +|Linux|Support|Ubuntu 22.04, Fedora Silverblue 39| |Windows|Support|Windows 11| @@ -50,7 +43,7 @@ For Intel CPU, recommend to use llama.cpp for X86 (Intel MKL building). |Intel Data Center Flex Series| Support| Flex 170| |Intel Arc Series| Support| Arc 770, 730M| |Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake| -|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7| +|Intel iGPU| Support| iGPU in i5-1250P, i7-1260P, i7-1165G7| Note: If the EUs (Execution Unit) in iGPU is less than 80, the inference speed will be too slow to use. @@ -64,6 +57,38 @@ For iGPU, please make sure the shared memory from host memory is enough. For lla For dGPU, please make sure the device memory is enough. For llama-2-7b.Q4_0, recommend the device memory is 4GB+. +## Docker + +Note: +- Only docker on Linux is tested. Docker on WSL may not work. +- You may need to install Intel GPU driver on the host machine (See the [Linux](#linux) section to know how to do that) + +### Build the image + +You can choose between **F16** and **F32** build. F16 is faster for long-prompt inference. + + +```sh +# For F16: +#docker build -t llama-cpp-sycl --build-arg="LLAMA_SYCL_F16=ON" -f .devops/main-intel.Dockerfile . + +# Or, for F32: +docker build -t llama-cpp-sycl -f .devops/main-intel.Dockerfile . + +# Note: you can also use the ".devops/main-server.Dockerfile", which compiles the "server" example +``` + +### Run + +```sh +# Firstly, find all the DRI cards: +ls -la /dev/dri +# Then, pick the card that you want to use. + +# For example with "/dev/dri/card1" +docker run -it --rm -v "$(pwd):/app:Z" --device /dev/dri/renderD128:/dev/dri/renderD128 --device /dev/dri/card1:/dev/dri/card1 llama-cpp-sycl -m "/app/models/YOUR_MODEL_FILE" -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 +``` + ## Linux ### Setup Environment @@ -76,7 +101,7 @@ Note: for iGPU, please install the client GPU driver. b. Add user to group: video, render. -``` +```sh sudo usermod -aG render username sudo usermod -aG video username ``` @@ -85,7 +110,7 @@ Note: re-login to enable it. c. Check -``` +```sh sudo apt install clinfo sudo clinfo -l ``` @@ -103,7 +128,6 @@ Platform #0: Intel(R) OpenCL HD Graphics 2. Install Intel® oneAPI Base toolkit. - a. Please follow the procedure in [Get the Intel® oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html). Recommend to install to default folder: **/opt/intel/oneapi**. @@ -112,7 +136,7 @@ Following guide use the default folder as example. If you use other folder, plea b. Check -``` +```sh source /opt/intel/oneapi/setvars.sh sycl-ls @@ -131,21 +155,25 @@ Output (example): 2. Build locally: -``` +Note: +- You can choose between **F16** and **F32** build. F16 is faster for long-prompt inference. +- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only. + +```sh mkdir -p build cd build source /opt/intel/oneapi/setvars.sh -#for FP16 -#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference +# For FP16: +#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON -#for FP32 +# Or, for FP32: cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -#build example/main only +# Build example/main only #cmake --build . --config Release --target main -#build all binary +# Or, build all binary cmake --build . --config Release -v cd .. @@ -153,14 +181,10 @@ cd .. or -``` +```sh ./examples/sycl/build.sh ``` -Note: - -- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only. - ### Run 1. Put model file to folder **models** @@ -177,10 +201,10 @@ source /opt/intel/oneapi/setvars.sh Run without parameter: -``` +```sh ./build/bin/ls-sycl-device -or +# or running the "main" executable and look at the output log: ./build/bin/main ``` @@ -209,13 +233,13 @@ found 4 SYCL devices: Set device ID = 0 by **GGML_SYCL_DEVICE=0** -``` +```sh GGML_SYCL_DEVICE=0 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 ``` or run by script: -``` -./examples/sycl/run-llama2.sh +```sh +./examples/sycl/run_llama2.sh ``` Note: diff --git a/README.md b/README.md index e6ed1d429..af1f09fa0 100644 --- a/README.md +++ b/README.md @@ -393,28 +393,28 @@ Building the program with BLAS support may lead to some performance improvements Check [BLIS.md](docs/BLIS.md) for more information. +- #### SYCL + SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators. + + llama.cpp based on SYCL is used to **support Intel GPU** (Data Center Max series, Flex series, Arc series, Built-in GPU and iGPU). + + For detailed info, please refer to [llama.cpp for SYCL](README-sycl.md). + - #### Intel oneMKL + Building through oneAPI compilers will make avx_vnni instruction set available for intel processors that do not support avx512 and avx512_vnni. Please note that this build config **does not support Intel GPU**. For Intel GPU support, please refer to [llama.cpp for SYCL](./README-sycl.md). + - Using manual oneAPI installation: By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. Otherwise please install oneAPI and follow the below steps: ```bash mkdir build cd build - source /opt/intel/oneapi/setvars.sh # You can skip this step if in oneapi-runtime docker image, only required for manual installation + source /opt/intel/oneapi/setvars.sh # You can skip this step if in oneapi-basekit docker image, only required for manual installation cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON cmake --build . --config Release ``` - Using oneAPI docker image: - If you do not want to source the environment vars and install oneAPI manually, you can also build the code using intel docker container: [oneAPI-runtime](https://hub.docker.com/r/intel/oneapi-runtime) - - ```bash - mkdir build - cd build - cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON - cmake --build . --config Release - ``` - - Building through oneAPI compilers will make avx_vnni instruction set available for intel processors that do not support avx512 and avx512_vnni. + If you do not want to source the environment vars and install oneAPI manually, you can also build the code using intel docker container: [oneAPI-basekit](https://hub.docker.com/r/intel/oneapi-basekit). Then, you can use the commands given above. Check [Optimizing and Running LLaMA2 on Intel® CPU](https://www.intel.com/content/www/us/en/content-details/791610/optimizing-and-running-llama2-on-intel-cpu.html) for more information. @@ -601,14 +601,48 @@ Building the program with BLAS support may lead to some performance improvements You can get a list of platforms and devices from the `clinfo -l` command, etc. -- #### SYCL +- #### Vulkan - SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators. + **With docker**: - llama.cpp based on SYCL is used to support Intel GPU (Data Center Max series, Flex series, Arc series, Built-in GPU and iGPU). + You don't need to install Vulkan SDK. It will be installed inside the container. - For detailed info, please refer to [llama.cpp for SYCL](README-sycl.md). + ```sh + # Build the image + docker build -t llama-cpp-vulkan -f .devops/main-vulkan.Dockerfile . + # Then, use it: + docker run -it --rm -v "$(pwd):/app:Z" --device /dev/dri/renderD128:/dev/dri/renderD128 --device /dev/dri/card1:/dev/dri/card1 llama-cpp-vulkan -m "/app/models/YOUR_MODEL_FILE" -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 + ``` + + **Without docker**: + + Firstly, you need to make sure you installed [Vulkan SDK](https://vulkan.lunarg.com/doc/view/latest/linux/getting_started_ubuntu.html) + + For example, on Ubuntu 22.04 (jammy), use the command below: + + ```bash + wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - + wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list + apt update -y + apt-get install -y vulkan-sdk + # To verify the installation, use the command below: + vulkaninfo + ``` + + Then, build llama.cpp using the cmake command below: + + ```bash + mkdir -p build + cd build + cmake .. -DLLAMA_VULKAN=1 + cmake --build . --config Release + # Test the output binary (with "-ngl 33" to offload all layers to GPU) + ./bin/main -m "PATH_TO_MODEL" -p "Hi you how are you" -n 50 -e -ngl 33 -t 4 + + # You should see in the output, ggml_vulkan detected your GPU. For example: + # ggml_vulkan: Using Intel(R) Graphics (ADL GT2) | uma: 1 | fp16: 1 | warp size: 32 + ``` ### Prepare Data & Run From b05102fe8cfa9893851c6bf6efd15cdc20b6afa2 Mon Sep 17 00:00:00 2001 From: AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> Date: Fri, 2 Feb 2024 08:39:48 +0000 Subject: [PATCH 07/94] Tidy ggml-sycl (#5261) * Tidy some code in ggml-sycl * Remove blank space * Remove std::printf comments --------- Co-authored-by: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> --- ggml-sycl.cpp | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 4ee2eed38..ac75f8e16 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -1366,6 +1366,7 @@ namespace dpct } #else return q.memcpy(to_ptr, from_ptr, size, dep_events); + GGML_UNUSED(direction); #endif // DPCT_USM_LEVEL_NONE } @@ -1667,7 +1668,7 @@ namespace dpct using Ty = typename DataType::T2; Ty s_h; if (get_pointer_attribute(q, s) == pointer_access_attribute::device_only) - detail::dpct_memcpy(q, (void *)&s_h, (void *)s, sizeof(T), device_to_host) + detail::dpct_memcpy(q, (void *)&s_h, (const void *)s, sizeof(T), device_to_host) .wait(); else s_h = *reinterpret_cast(s); @@ -1691,6 +1692,20 @@ namespace dpct int ldb, const void *beta, void *c, int ldc) { #ifndef __INTEL_MKL__ + GGML_UNUSED(q); + GGML_UNUSED(a_trans); + GGML_UNUSED(b_trans); + GGML_UNUSED(m); + GGML_UNUSED(n); + GGML_UNUSED(k); + GGML_UNUSED(alpha); + GGML_UNUSED(a); + GGML_UNUSED(lda); + GGML_UNUSED(b); + GGML_UNUSED(ldb); + GGML_UNUSED(beta); + GGML_UNUSED(c); + GGML_UNUSED(ldc); throw std::runtime_error("The oneAPI Math Kernel Library (oneMKL) Interfaces " "Project does not support this API."); #else @@ -1830,7 +1845,7 @@ namespace dpct template T permute_sub_group_by_xor(sycl::sub_group g, T x, unsigned int mask, - int logical_sub_group_size = 32) + unsigned int logical_sub_group_size = 32) { unsigned int id = g.get_local_linear_id(); unsigned int start_index = @@ -2160,6 +2175,7 @@ namespace dpct } #else return q.memcpy(to_ptr, from_ptr, size, dep_events); + GGML_UNUSED(direction); #endif // DPCT_USM_LEVEL_NONE } @@ -3302,7 +3318,7 @@ void log_ggml_var_device(const char*name, float *src, size_t total_elements, boo std::ofstream logfile; logfile.open(filename); // printf("local buf element %d\n", total_elements); - for(int i=0; ibackend == GGML_BACKEND_GPU && device_id == g_main_device ? ne0 : row_diff; - const int compute_capability = g_device_caps[id].cc; #ifdef GGML_SYCL_F16 bool use_fp16 = true; // TODO(Yu) SYCL capability check #else @@ -12691,7 +12700,7 @@ static void ggml_sycl_set_peer_access(const int n_tokens) { continue; } - int can_access_peer; + // int can_access_peer; // SYCL_CHECK(syclDeviceCanAccessPeer(&can_access_peer, id, id_other)); // if (can_access_peer) { // if (enable_peer_access) { @@ -12716,7 +12725,6 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0, const int64_t ne01 = src0->ne[1]; const int64_t ne02 = src0->ne[2]; const int64_t ne03 = src0->ne[3]; - const int64_t nrows0 = ggml_nrows(src0); const int64_t ne10 = src1->ne[0]; const int64_t ne11 = src1->ne[1]; @@ -13812,13 +13820,6 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0, src1_row_extra.data_device[g_main_device_index] = src1_contiguous.get(); dst_row_extra.data_device[g_main_device_index] = dst_contiguous.get(); - const dpct::memcpy_direction src1_kind = - src1->backend == GGML_BACKEND_CPU ? dpct::host_to_device - : dpct::device_to_device; - const dpct::memcpy_direction dst_kind = dst->backend == GGML_BACKEND_CPU - ? dpct::device_to_host - : dpct::device_to_device; - for (int32_t row_id = 0; row_id < n_as; ++row_id) { const struct ggml_tensor * src0_row = dst->src[row_id + 2]; From 2d40085c26794e29c434480b9e06738e89e5686f Mon Sep 17 00:00:00 2001 From: Mirror Azure <54669636+MirrorAzure@users.noreply.github.com> Date: Fri, 2 Feb 2024 14:39:09 +0300 Subject: [PATCH 08/94] py : add check for '.attn.masked_bias' layers to GPT2model (#5281) --- convert-hf-to-gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 4ebab07b3..a6ffd128b 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -1138,7 +1138,7 @@ class GPT2Model(Model): for name, data_torch in self.get_tensors(): # we don't need these - if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq", ".attn.bias")): + if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq", ".attn.bias", ".attn.masked_bias")): continue if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_proj.weight")): From e437b37fd0b2b97e6c6ff1045ec7f901faa6498a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 2 Feb 2024 14:23:40 +0200 Subject: [PATCH 09/94] scripts : parse wtype in server-llm.sh (#5167) * scripts : parse wtype in server-llm.sh * scripts : fix check for wfile --- scripts/server-llm.sh | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/scripts/server-llm.sh b/scripts/server-llm.sh index 7bf0929bb..0b83cdbbc 100644 --- a/scripts/server-llm.sh +++ b/scripts/server-llm.sh @@ -141,6 +141,28 @@ for wt in "${wtypes[@]}"; do wfiles+=("") done +# map wtype input to index +if [[ ! -z "$wtype" ]]; then + iw=-1 + is=0 + for wt in "${wtypes[@]}"; do + # uppercase + uwt=$(echo "$wt" | tr '[:lower:]' '[:upper:]') + if [[ "$uwt" == "$wtype" ]]; then + iw=$is + break + fi + is=$((is+1)) + done + + if [[ $iw -eq -1 ]]; then + printf "[-] Invalid weight type: %s\n" "$wtype" + exit 1 + fi + + wtype="$iw" +fi + # sample repos repos=( "https://huggingface.co/TheBloke/Llama-2-7B-GGUF" @@ -252,8 +274,10 @@ for file in $model_files; do printf " %2d) %s %s\n" $iw "$have" "$file" done +wfile="${wfiles[$wtype]}" + # ask for weights type until provided and available -while [[ -z "$wtype" ]]; do +while [[ -z "$wfile" ]]; do printf "\n" read -p "[+] Select weight type: " wtype wfile="${wfiles[$wtype]}" From 191221178f51b6e81122c5bda0fd79620e547d07 Mon Sep 17 00:00:00 2001 From: kalomaze <66376113+kalomaze@users.noreply.github.com> Date: Fri, 2 Feb 2024 08:15:30 -0600 Subject: [PATCH 10/94] perplexity : fix KL divergence calculations on Windows (#5273) --- examples/perplexity/perplexity.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 8d2204969..4b08145cd 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -457,14 +457,14 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par std::ofstream logits_stream; if (!params.logits_file.empty()) { - logits_stream.open(params.logits_file.c_str()); + logits_stream.open(params.logits_file.c_str(), std::ios::binary); if (!logits_stream.is_open()) { fprintf(stderr, "%s: failed to open %s for writing\n", __func__, params.logits_file.c_str()); return {}; } fprintf(stderr, "%s: saving all logits to %s\n", __func__, params.logits_file.c_str()); logits_stream.write("_logits_", 8); - logits_stream.write((const char *)&n_ctx, sizeof(n_ctx)); + logits_stream.write(reinterpret_cast(&n_ctx), sizeof(n_ctx)); } auto tim1 = std::chrono::high_resolution_clock::now(); From a305dba8ff642e57f538f42010868fe0bc5262a1 Mon Sep 17 00:00:00 2001 From: AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> Date: Sat, 3 Feb 2024 08:11:37 +0000 Subject: [PATCH 11/94] Fix im2col with 32fp (#5286) --- ggml-sycl.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index ac75f8e16..51445b5e7 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -8247,7 +8247,8 @@ static void clamp_f32(const float * x, float * dst, const float min, const float dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]); } -static void im2col_f32_f16(const float *x, sycl::half *dst, int offset_delta, +template +static void im2col_kernel(const float *x, T *dst, int offset_delta, int IW, int IH, int OW, int KW, int KH, int pelements, int CHW, int s0, int s1, int p0, int p1, int d0, int d1, @@ -11019,7 +11020,8 @@ static void soft_max_f32_sycl(const float *x, const float *y, float *dst, }); } -static void im2col_f32_f16_sycl(const float *x, sycl::half *dst, int IW, int IH, +template +static void im2col_sycl(const float *x, T *dst, int IW, int IH, int OW, int OH, int KW, int KH, int IC, int offset_delta, int s0, int s1, int p0, int p1, int d0, int d1, @@ -11036,7 +11038,7 @@ static void im2col_f32_f16_sycl(const float *x, sycl::half *dst, int IW, int IH, sycl::range<3>(1, 1, SYCL_IM2COL_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_IM2COL_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { - im2col_f32_f16(x, dst, offset_delta, IW, IH, OW, KW, KH, + im2col_kernel(x, dst, offset_delta, IW, IH, OW, KW, KH, parallel_elements, (IC * KH * KW), s0, s1, p0, p1, d0, d1, item_ct1); }); @@ -12424,7 +12426,7 @@ inline void ggml_sycl_op_im2col(const ggml_tensor *src0, GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_ASSERT(src1->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_F16); + GGML_ASSERT( dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32); const int32_t s0 = ((const int32_t*)(dst->op_params))[0]; const int32_t s1 = ((const int32_t*)(dst->op_params))[1]; @@ -12447,8 +12449,11 @@ inline void ggml_sycl_op_im2col(const ggml_tensor *src0, const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32 - im2col_f32_f16_sycl(src1_dd, (sycl::half *)dst_dd, IW, IH, OW, OH, KW, KH, - IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream); + if (dst->type == GGML_TYPE_F16) { + im2col_sycl(src1_dd, (sycl::half *)dst_dd, IW, IH, OW, OH, KW, KH, IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream); + } else { + im2col_sycl(src1_dd, (float *)dst_dd, IW, IH, OW, OH, KW, KH, IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream); + } (void) src0; (void) src0_dd; From 6a66c5071a74a96c4f52cf1015a092acd18c3714 Mon Sep 17 00:00:00 2001 From: BADR Date: Sat, 3 Feb 2024 12:20:26 +0100 Subject: [PATCH 12/94] readme : add tenere in the ui tools list (#5284) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index af1f09fa0..4a9bdf314 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,7 @@ as the main playground for developing new features for the [ggml](https://github - [psugihara/FreeChat](https://github.com/psugihara/FreeChat) - [ptsochantaris/emeltal](https://github.com/ptsochantaris/emeltal) - [iohub/collama](https://github.com/iohub/coLLaMA) +- [pythops/tenere](https://github.com/pythops/tenere) --- From 1ec3332ade60aeb1494ace2211cf1a966db6d770 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Sat, 3 Feb 2024 06:22:06 -0500 Subject: [PATCH 13/94] YaRN : store rope scaling type as int32_t in memory (#5285) * YaRN : store rope scaling type as int32_t in memory * llama : store mapped names as const char * --- common/common.h | 3 +-- llama.cpp | 24 ++++++++++++------------ llama.h | 2 +- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/common/common.h b/common/common.h index 24a99d728..62de25d6a 100644 --- a/common/common.h +++ b/common/common.h @@ -75,8 +75,7 @@ struct gpt_params { float yarn_beta_fast = 32.0f; // YaRN low correction dim float yarn_beta_slow = 1.0f; // YaRN high correction dim int32_t yarn_orig_ctx = 0; // YaRN original context length - int8_t rope_scaling_type = LLAMA_ROPE_SCALING_UNSPECIFIED; // TODO: better to be int32_t for alignment - // pinging @cebtenzzre + int32_t rope_scaling_type = LLAMA_ROPE_SCALING_UNSPECIFIED; // // sampling parameters struct llama_sampling_params sparams; diff --git a/llama.cpp b/llama.cpp index 6bf7f9efb..4787a92fe 100644 --- a/llama.cpp +++ b/llama.cpp @@ -208,7 +208,7 @@ enum llm_arch { LLM_ARCH_UNKNOWN, }; -static std::map LLM_ARCH_NAMES = { +static std::map LLM_ARCH_NAMES = { { LLM_ARCH_LLAMA, "llama" }, { LLM_ARCH_FALCON, "falcon" }, { LLM_ARCH_GPT2, "gpt2" }, @@ -285,7 +285,7 @@ enum llm_kv { LLM_KV_TOKENIZER_RWKV, }; -static std::map LLM_KV_NAMES = { +static std::map LLM_KV_NAMES = { { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" }, { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" }, { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" }, @@ -346,7 +346,7 @@ struct LLM_KV { llm_arch arch; std::string operator()(llm_kv kv) const { - return ::format(LLM_KV_NAMES[kv].c_str(), LLM_ARCH_NAMES[arch].c_str()); + return ::format(LLM_KV_NAMES[kv], LLM_ARCH_NAMES[arch]); } }; @@ -747,13 +747,13 @@ struct LLM_TN { // gguf helpers // -static std::map LLAMA_ROPE_SCALING_TYPES = { +static std::map LLAMA_ROPE_SCALING_TYPES = { { LLAMA_ROPE_SCALING_NONE, "none" }, { LLAMA_ROPE_SCALING_LINEAR, "linear" }, { LLAMA_ROPE_SCALING_YARN, "yarn" }, }; -static int8_t llama_rope_scaling_type_from_string(const std::string & name) { +static int32_t llama_rope_scaling_type_from_string(const std::string & name) { for (const auto & kv : LLAMA_ROPE_SCALING_TYPES) { if (kv.second == name) { return kv.first; @@ -1415,6 +1415,7 @@ static const size_t GiB = 1024*MiB; struct llama_hparams { bool vocab_only; + bool rope_finetuned; uint32_t n_vocab; uint32_t n_ctx_train; // context size the model was trained on uint32_t n_embd; @@ -1434,8 +1435,7 @@ struct llama_hparams { float rope_freq_base_train; float rope_freq_scale_train; uint32_t n_yarn_orig_ctx; - int8_t rope_scaling_type_train : 3; - bool rope_finetuned : 1; + int32_t rope_scaling_type_train; float f_clamp_kqv; float f_max_alibi_bias; @@ -2701,7 +2701,7 @@ struct llama_model_loader { // load LLaMA models // -static std::string llama_model_arch_name(llm_arch arch) { +static const char * llama_model_arch_name(llm_arch arch) { auto it = LLM_ARCH_NAMES.find(arch); if (it == LLM_ARCH_NAMES.end()) { return "unknown"; @@ -3310,11 +3310,11 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) { const auto & hparams = model.hparams; const auto & vocab = model.vocab; - const auto rope_scaling_type = LLAMA_ROPE_SCALING_TYPES.at(hparams.rope_scaling_type_train); + const char * rope_scaling_type = LLAMA_ROPE_SCALING_TYPES.at(hparams.rope_scaling_type_train); // hparams LLAMA_LOG_INFO("%s: format = %s\n", __func__, llama_file_version_name(ml.fver)); - LLAMA_LOG_INFO("%s: arch = %s\n", __func__, LLM_ARCH_NAMES.at(model.arch).c_str()); + LLAMA_LOG_INFO("%s: arch = %s\n", __func__, LLM_ARCH_NAMES.at(model.arch)); LLAMA_LOG_INFO("%s: vocab type = %s\n", __func__, llama_model_vocab_type_name(vocab.type)); LLAMA_LOG_INFO("%s: n_vocab = %u\n", __func__, hparams.n_vocab); LLAMA_LOG_INFO("%s: n_merges = %u\n", __func__, (int) vocab.bpe_ranks.size()); @@ -3336,7 +3336,7 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) { LLAMA_LOG_INFO("%s: n_ff = %u\n", __func__, hparams.n_ff); LLAMA_LOG_INFO("%s: n_expert = %u\n", __func__, hparams.n_expert); LLAMA_LOG_INFO("%s: n_expert_used = %u\n", __func__, hparams.n_expert_used); - LLAMA_LOG_INFO("%s: rope scaling = %s\n", __func__, rope_scaling_type.c_str()); + LLAMA_LOG_INFO("%s: rope scaling = %s\n", __func__, rope_scaling_type); LLAMA_LOG_INFO("%s: freq_base_train = %.1f\n", __func__, hparams.rope_freq_base_train); LLAMA_LOG_INFO("%s: freq_scale_train = %g\n", __func__, hparams.rope_freq_scale_train); LLAMA_LOG_INFO("%s: n_yarn_orig_ctx = %u\n", __func__, hparams.n_yarn_orig_ctx); @@ -10735,7 +10735,7 @@ int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int3 int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) { return snprintf(buf, buf_size, "%s %s %s", - llama_model_arch_name(model->arch).c_str(), + llama_model_arch_name(model->arch), llama_model_type_name(model->type), llama_model_ftype_name(model->ftype).c_str()); } diff --git a/llama.h b/llama.h index 9a60e9bfb..cec4158bc 100644 --- a/llama.h +++ b/llama.h @@ -213,7 +213,7 @@ extern "C" { uint32_t n_batch; // prompt processing maximum batch size uint32_t n_threads; // number of threads to use for generation uint32_t n_threads_batch; // number of threads to use for batch processing - int8_t rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type` + int32_t rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type` // ref: https://github.com/ggerganov/llama.cpp/pull/2054 float rope_freq_base; // RoPE base frequency, 0 = from model From 52bb63c7082c859c3f1dfc527227e6a95b299c7c Mon Sep 17 00:00:00 2001 From: Michael Klimenko Date: Sat, 3 Feb 2024 12:23:37 +0100 Subject: [PATCH 14/94] refactor : switch to emplace_back to avoid extra object (#5291) --- common/common.cpp | 8 ++--- examples/llama-bench/llama-bench.cpp | 34 +++++++++++----------- examples/main/main.cpp | 4 +-- examples/perplexity/perplexity.cpp | 8 ++--- examples/quantize-stats/quantize-stats.cpp | 4 +-- examples/quantize/quantize.cpp | 4 +-- examples/server/server.cpp | 8 ++--- tests/test-llama-grammar.cpp | 2 +- 8 files changed, 36 insertions(+), 36 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index ce739b15c..3302caa20 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -515,7 +515,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } - params.lora_adapter.push_back(std::make_tuple(argv[i], 1.0f)); + params.lora_adapter.emplace_back(argv[i], 1.0f); params.use_mmap = false; } else if (arg == "--lora-scaled") { if (++i >= argc) { @@ -527,7 +527,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } - params.lora_adapter.push_back(std::make_tuple(lora_adapter, std::stof(argv[i]))); + params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i])); params.use_mmap = false; } else if (arg == "--lora-base") { if (++i >= argc) { @@ -664,7 +664,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } - params.antiprompt.push_back(argv[i]); + params.antiprompt.emplace_back(argv[i]); } else if (arg == "-ld" || arg == "--logdir") { if (++i >= argc) { invalid_param = true; @@ -880,7 +880,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { } if (!params.kv_overrides.empty()) { - params.kv_overrides.emplace_back(llama_model_kv_override()); + params.kv_overrides.emplace_back(); params.kv_overrides.back().key[0] = 0; } diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index e36c061a2..ddb0ba064 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -948,46 +948,46 @@ struct markdown_printer : public printer { void print_header(const cmd_params & params) override { // select fields to print - fields.push_back("model"); - fields.push_back("size"); - fields.push_back("params"); - fields.push_back("backend"); + fields.emplace_back("model"); + fields.emplace_back("size"); + fields.emplace_back("params"); + fields.emplace_back("backend"); bool is_cpu_backend = test::get_backend() == "CPU" || test::get_backend() == "BLAS"; if (!is_cpu_backend) { - fields.push_back("n_gpu_layers"); + fields.emplace_back("n_gpu_layers"); } if (params.n_threads.size() > 1 || params.n_threads != cmd_params_defaults.n_threads || is_cpu_backend) { - fields.push_back("n_threads"); + fields.emplace_back("n_threads"); } if (params.n_batch.size() > 1 || params.n_batch != cmd_params_defaults.n_batch) { - fields.push_back("n_batch"); + fields.emplace_back("n_batch"); } if (params.type_k.size() > 1 || params.type_k != cmd_params_defaults.type_k) { - fields.push_back("type_k"); + fields.emplace_back("type_k"); } if (params.type_v.size() > 1 || params.type_v != cmd_params_defaults.type_v) { - fields.push_back("type_v"); + fields.emplace_back("type_v"); } if (params.main_gpu.size() > 1 || params.main_gpu != cmd_params_defaults.main_gpu) { - fields.push_back("main_gpu"); + fields.emplace_back("main_gpu"); } if (params.split_mode.size() > 1 || params.split_mode != cmd_params_defaults.split_mode) { - fields.push_back("split_mode"); + fields.emplace_back("split_mode"); } if (params.mul_mat_q.size() > 1 || params.mul_mat_q != cmd_params_defaults.mul_mat_q) { - fields.push_back("mul_mat_q"); + fields.emplace_back("mul_mat_q"); } if (params.no_kv_offload.size() > 1 || params.no_kv_offload != cmd_params_defaults.no_kv_offload) { - fields.push_back("no_kv_offload"); + fields.emplace_back("no_kv_offload"); } if (params.tensor_split.size() > 1 || params.tensor_split != cmd_params_defaults.tensor_split) { - fields.push_back("tensor_split"); + fields.emplace_back("tensor_split"); } if (params.use_mmap.size() > 1 || params.use_mmap != cmd_params_defaults.use_mmap) { - fields.push_back("use_mmap"); + fields.emplace_back("use_mmap"); } - fields.push_back("test"); - fields.push_back("t/s"); + fields.emplace_back("test"); + fields.emplace_back("t/s"); fprintf(fout, "|"); for (const auto & field : fields) { diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 1c6138d23..0ed4d79f9 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -352,12 +352,12 @@ int main(int argc, char ** argv) { // in instruct mode, we inject a prefix and a suffix to each input by the user if (params.instruct) { params.interactive_first = true; - params.antiprompt.push_back("### Instruction:\n\n"); + params.antiprompt.emplace_back("### Instruction:\n\n"); } // similar for chatml mode else if (params.chatml) { params.interactive_first = true; - params.antiprompt.push_back("<|im_start|>user\n"); + params.antiprompt.emplace_back("<|im_start|>user\n"); } // enable interactive mode if interactive start is specified diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 4b08145cd..b2c131d4c 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -881,7 +881,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) { size_t li = hs_cur.common_prefix; for (int s = 0; s < 4; ++s) { for (size_t j = hs_cur.common_prefix; j < hs_cur.seq_tokens[s].size() - 1; j++) { - eval_pairs.push_back(std::make_pair(hs_cur.i_batch + li++, hs_cur.seq_tokens[s][j + 1])); + eval_pairs.emplace_back(hs_cur.i_batch + li++, hs_cur.seq_tokens[s][j + 1]); } ++li; } @@ -1159,13 +1159,13 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) { const int last_1st = task.seq_tokens[0].size() - n_base1 > 1 ? 1 : 0; size_t li = n_base1 - 1; for (size_t j = n_base1-1; j < task.seq_tokens[0].size()-1-last_1st; ++j) { - eval_pairs.push_back(std::make_pair(task.i_batch + li++, task.seq_tokens[0][j+1])); + eval_pairs.emplace_back(task.i_batch + li++, task.seq_tokens[0][j+1]); } const auto& n_base2 = skip_choice ? task.n_base2 : task.common_prefix; const int last_2nd = task.seq_tokens[1].size() - n_base2 > 1 ? 1 : 0; li = task.seq_tokens[0].size() - task.common_prefix + n_base2 - 1; for (size_t j = n_base2-1; j < task.seq_tokens[1].size()-1-last_2nd; ++j) { - eval_pairs.push_back(std::make_pair(task.i_batch + li++, task.seq_tokens[1][j+1])); + eval_pairs.emplace_back(task.i_batch + li++, task.seq_tokens[1][j+1]); } } compute_logprobs(batch_logits.data(), n_vocab, workers, eval_pairs, eval_results); @@ -1524,7 +1524,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params size_t li = cur_task.common_prefix; for (int s = 0; s < int(cur_task.seq_tokens.size()); ++s) { for (size_t j = cur_task.common_prefix; j < cur_task.seq_tokens[s].size() - 1; j++) { - eval_pairs.push_back(std::make_pair(cur_task.i_batch + li++, cur_task.seq_tokens[s][j + 1])); + eval_pairs.emplace_back(cur_task.i_batch + li++, cur_task.seq_tokens[s][j + 1]); } ++li; } diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 6d5f213dc..1d05f1391 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -257,13 +257,13 @@ int main(int argc, char ** argv) { invalid_param = true; break; } - params.include_layers.push_back(argv[i]); + params.include_layers.emplace_back(argv[i]); } else if (arg == "-L" || arg == "--exclude-layer") { if (++i >= argc) { invalid_param = true; break; } - params.exclude_layers.push_back(argv[i]); + params.exclude_layers.emplace_back(argv[i]); } else if (arg == "-t" || arg == "--type") { if (++i >= argc) { invalid_param = true; diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index a9673f0d4..85f403ffc 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -208,13 +208,13 @@ int main(int argc, char ** argv) { } } else if (strcmp(argv[arg_idx], "--include-weights") == 0) { if (arg_idx < argc-1) { - included_weights.push_back(argv[++arg_idx]); + included_weights.emplace_back(argv[++arg_idx]); } else { usage(argv[0]); } } else if (strcmp(argv[arg_idx], "--exclude-weights") == 0) { if (arg_idx < argc-1) { - excluded_weights.push_back(argv[++arg_idx]); + excluded_weights.emplace_back(argv[++arg_idx]); } else { usage(argv[0]); } diff --git a/examples/server/server.cpp b/examples/server/server.cpp index ea77125ea..a9f8cb369 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1884,7 +1884,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, invalid_param = true; break; } - sparams.api_keys.push_back(argv[i]); + sparams.api_keys.emplace_back(argv[i]); } else if (arg == "--api-key-file") { @@ -2160,7 +2160,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, invalid_param = true; break; } - params.lora_adapter.push_back(std::make_tuple(argv[i], 1.0f)); + params.lora_adapter.emplace_back(argv[i], 1.0f); params.use_mmap = false; } else if (arg == "--lora-scaled") @@ -2176,7 +2176,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, invalid_param = true; break; } - params.lora_adapter.push_back(std::make_tuple(lora_adapter, std::stof(argv[i]))); + params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i])); params.use_mmap = false; } else if (arg == "--lora-base") @@ -2318,7 +2318,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, } } if (!params.kv_overrides.empty()) { - params.kv_overrides.emplace_back(llama_model_kv_override()); + params.kv_overrides.emplace_back(); params.kv_overrides.back().key[0] = 0; } diff --git a/tests/test-llama-grammar.cpp b/tests/test-llama-grammar.cpp index 78fc41117..16ebe753f 100644 --- a/tests/test-llama-grammar.cpp +++ b/tests/test-llama-grammar.cpp @@ -105,7 +105,7 @@ int main() for (auto rule : expected_rules) { - parsed_grammar.rules.push_back({}); + parsed_grammar.rules.emplace_back(); for (auto element : rule) { parsed_grammar.rules.back().push_back(element); From e920ed393d989ed35625ddaf182ebb52cda07fcd Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Sat, 3 Feb 2024 18:15:00 +0100 Subject: [PATCH 15/94] Vulkan Intel Fixes, Optimizations and Debugging Flags (#5301) * Fix Vulkan on Intel ARC Optimize matmul for Intel ARC Add Vulkan dequant test * Add Vulkan debug and validate flags to Make and CMakeLists.txt * Enable asynchronous transfers in Vulkan backend * Fix flake8 * Disable Vulkan async backend functions for now * Also add Vulkan run tests command to Makefile and CMakeLists.txt --- CMakeLists.txt | 20 + Makefile | 12 + ggml-vulkan-shaders.hpp | 10922 +++------------------------------- ggml-vulkan.cpp | 420 +- ggml_vk_generate_shaders.py | 213 +- 5 files changed, 1257 insertions(+), 10330 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1ee455b3a..c156c4824 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,6 +100,10 @@ option(LLAMA_HIPBLAS "llama: use hipBLAS" option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) option(LLAMA_CLBLAST "llama: use CLBlast" OFF) option(LLAMA_VULKAN "llama: use Vulkan" OFF) +option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) +option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) +option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) +option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) @@ -431,6 +435,22 @@ if (LLAMA_VULKAN) add_compile_definitions(GGML_USE_VULKAN) + if (LLAMA_VULKAN_CHECK_RESULTS) + target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_CHECK_RESULTS) + endif() + + if (LLAMA_VULKAN_DEBUG) + target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_DEBUG) + endif() + + if (LLAMA_VULKAN_VALIDATE) + target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_VALIDATE) + endif() + + if (LLAMA_VULKAN_RUN_TESTS) + target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_RUN_TESTS) + endif() + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ggml-vulkan) else() message(WARNING "Vulkan not found") diff --git a/Makefile b/Makefile index bf9e085de..a55d15888 100644 --- a/Makefile +++ b/Makefile @@ -457,6 +457,18 @@ ifdef LLAMA_VULKAN_CHECK_RESULTS MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS endif +ifdef LLAMA_VULKAN_DEBUG + MK_CPPFLAGS += -DGGML_VULKAN_DEBUG +endif + +ifdef LLAMA_VULKAN_VALIDATE + MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE +endif + +ifdef LLAMA_VULKAN_RUN_TESTS + MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS +endif + ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h $(CXX) $(CXXFLAGS) -c $< -o $@ endif # LLAMA_VULKAN diff --git a/ggml-vulkan-shaders.hpp b/ggml-vulkan-shaders.hpp index 195410c02..e5e7a8414 100644 --- a/ggml-vulkan-shaders.hpp +++ b/ggml-vulkan-shaders.hpp @@ -890,156 +890,6 @@ const uint64_t cpy_f32_f32_len = 2472; unsigned char dequant_f16_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x81,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, -0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, -0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00,0x0e,0x00,0x03,0x00, -0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x09,0x00, -0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, -0x00,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x10,0x00,0x06,0x00, -0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x00,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x0c,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x14,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x14,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x4d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x4f,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x5a,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x5b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x5b,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x5b,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x5d,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5d,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x7e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, -0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0a,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x0d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0e,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x14,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x15,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x15,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x18,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00,0x23,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x36,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x48,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x4c,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x4d,0x00,0x00,0x00,0x4c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x5a,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x5b,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x5c,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x2c,0x00,0x06,0x00,0x0a,0x00,0x00,0x00, -0x7e,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x7f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00, -0x0d,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x80,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0e,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x0d,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x18,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0xaf,0x00,0x05,0x00, -0x23,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0xa8,0x00,0x04,0x00,0x23,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x29,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x2a,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x18,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x2f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xaf,0x00,0x05,0x00, -0x23,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x2f,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x2b,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x2b,0x00,0x00,0x00,0xf5,0x00,0x07,0x00, -0x23,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x28,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x33,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x31,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x32,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x7f,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x33,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x36,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x39,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3d,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x39,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x3d,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x52,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x48,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x52,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x48,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x18,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x61,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x61,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x52,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x6c,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x52,0x00,0x00,0x00,0x7a,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x7a,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x7f,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x7f,0x00,0x00,0x00, -0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, -}; -const uint64_t dequant_f16_len = 1748; - -unsigned char dequant_f16_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x86,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, @@ -1192,344 +1042,10 @@ unsigned char dequant_f16_fp32_data[] = { 0xf8,0x00,0x02,0x00,0x84,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, 0x38,0x00,0x01,0x00, }; -const uint64_t dequant_f16_fp32_len = 1816; +const uint64_t dequant_f16_len = 1816; unsigned char dequant_q2_K_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x0c,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x25,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x17,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x23,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x23,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x23,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x33,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x5a,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x5c,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x5f,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x5f,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x5f,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x60,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x61,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x61,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x61,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x63,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x63,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x78,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x79,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x79,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x79,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x7b,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x7b,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0xfc,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, -0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x14,0x00,0x02,0x00,0x11,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x15,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x15,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x16,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x19,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x1e,0x00,0x06,0x00,0x23,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x24,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x26,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x16,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x4b,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x59,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, -0x5c,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x5d,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x17,0x00,0x04,0x00,0x5e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x1e,0x00,0x05,0x00,0x5f,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x60,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x61,0x00,0x00,0x00,0x60,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x62,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x61,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x62,0x00,0x00,0x00, -0x63,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x69,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x70,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x78,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x79,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x7a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x7a,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x97,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xbc,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xdb,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xe0,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x2c,0x00,0x06,0x00, -0x15,0x00,0x00,0x00,0xfc,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x75,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x2a,0x00,0x03,0x00, -0x11,0x00,0x00,0x00,0xff,0x00,0x00,0x00,0x29,0x00,0x03,0x00, -0x11,0x00,0x00,0x00,0x02,0x01,0x00,0x00,0x36,0x00,0x05,0x00, -0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0xfd,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x18,0x00,0x00,0x00,0xfe,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xfe,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x0a,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x0a,0x00,0x00,0x00, -0xf5,0x00,0x07,0x00,0x06,0x00,0x00,0x00,0x05,0x01,0x00,0x00, -0x09,0x00,0x00,0x00,0xfe,0x00,0x00,0x00,0xfb,0x00,0x00,0x00, -0x0d,0x00,0x00,0x00,0xb1,0x00,0x05,0x00,0x11,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x05,0x01,0x00,0x00,0x10,0x00,0x00,0x00, -0xf6,0x00,0x04,0x00,0x0c,0x00,0x00,0x00,0x0d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x12,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x0b,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x19,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x14,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x05,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x14,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x26,0x00,0x00,0x00, -0x27,0x00,0x00,0x00,0x25,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x28,0x00,0x00,0x00, -0x27,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x26,0x00,0x00,0x00, -0x2a,0x00,0x00,0x00,0x25,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0x2a,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x2c,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x2c,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0xaf,0x00,0x05,0x00, -0x11,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x30,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x2e,0x00,0x00,0x00, -0x2f,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x2f,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x0c,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x30,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x19,0x00,0x00,0x00,0x34,0x00,0x00,0x00,0x33,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x35,0x00,0x00,0x00,0x34,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x36,0x00,0x00,0x00,0x35,0x00,0x00,0x00, -0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x36,0x00,0x00,0x00,0x39,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3e,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x36,0x00,0x00,0x00,0x3e,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x43,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x46,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x47,0x00,0x00,0x00,0x43,0x00,0x00,0x00,0x46,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x4b,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x69,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0x36,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x70,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x5d,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x70,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x75,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x5d,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x69,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x47,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0x83,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x84,0x00,0x00,0x00,0x83,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x85,0x00,0x00,0x00, -0x84,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x87,0x00,0x00,0x00,0x85,0x00,0x00,0x00,0x86,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x56,0x00,0x00,0x00,0x89,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x8a,0x00,0x00,0x00,0x89,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x8b,0x00,0x00,0x00, -0x8a,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x8d,0x00,0x00,0x00,0x8b,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x8e,0x00,0x00,0x00, -0x87,0x00,0x00,0x00,0x8d,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x5d,0x00,0x00,0x00,0x8f,0x00,0x00,0x00,0x8e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x56,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0x82,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0x98,0x00,0x00,0x00,0x96,0x00,0x00,0x00,0x97,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x5d,0x00,0x00,0x00,0x99,0x00,0x00,0x00, -0x98,0x00,0x00,0x00,0x85,0x00,0x05,0x00,0x5d,0x00,0x00,0x00, -0x9a,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x99,0x00,0x00,0x00, -0x7f,0x00,0x04,0x00,0x5d,0x00,0x00,0x00,0x08,0x01,0x00,0x00, -0x9a,0x00,0x00,0x00,0x0c,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x9b,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0x8f,0x00,0x00,0x00,0x08,0x01,0x00,0x00, -0x41,0x00,0x06,0x00,0x70,0x00,0x00,0x00,0x9c,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x9c,0x00,0x00,0x00,0x9b,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x9e,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x39,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa2,0x00,0x00,0x00,0x47,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x69,0x00,0x00,0x00, -0xa3,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0xa2,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x56,0x00,0x00,0x00,0xa4,0x00,0x00,0x00, -0xa3,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0xa5,0x00,0x00,0x00,0xa4,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xa6,0x00,0x00,0x00,0xa5,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xa7,0x00,0x00,0x00, -0xa6,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0xa9,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0xaa,0x00,0x00,0x00,0xa9,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xab,0x00,0x00,0x00,0xaa,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xac,0x00,0x00,0x00, -0xab,0x00,0x00,0x00,0x8c,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xad,0x00,0x00,0x00,0xa7,0x00,0x00,0x00, -0xac,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x5d,0x00,0x00,0x00, -0xae,0x00,0x00,0x00,0xad,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x56,0x00,0x00,0x00,0xb5,0x00,0x00,0x00,0xa3,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x56,0x00,0x00,0x00,0xb6,0x00,0x00,0x00, -0xb5,0x00,0x00,0x00,0x97,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x5d,0x00,0x00,0x00,0xb7,0x00,0x00,0x00,0xb6,0x00,0x00,0x00, -0x85,0x00,0x05,0x00,0x5d,0x00,0x00,0x00,0xb8,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0xb7,0x00,0x00,0x00,0x7f,0x00,0x04,0x00, -0x5d,0x00,0x00,0x00,0x09,0x01,0x00,0x00,0xb8,0x00,0x00,0x00, -0x0c,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0xb9,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0xae,0x00,0x00,0x00,0x09,0x01,0x00,0x00,0x41,0x00,0x06,0x00, -0x70,0x00,0x00,0x00,0xba,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x9e,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xba,0x00,0x00,0x00,0xb9,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xbd,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0xbc,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xc1,0x00,0x00,0x00,0x47,0x00,0x00,0x00,0x97,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x69,0x00,0x00,0x00,0xc2,0x00,0x00,0x00, -0x63,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0xc1,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x56,0x00,0x00,0x00,0xc3,0x00,0x00,0x00,0xc2,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0xc4,0x00,0x00,0x00, -0xc3,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xc5,0x00,0x00,0x00,0xc4,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xc6,0x00,0x00,0x00,0xc5,0x00,0x00,0x00, -0x86,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0xc8,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x97,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0xc9,0x00,0x00,0x00, -0xc8,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xca,0x00,0x00,0x00,0xc9,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xcb,0x00,0x00,0x00,0xca,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xcc,0x00,0x00,0x00,0xc6,0x00,0x00,0x00,0xcb,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x5d,0x00,0x00,0x00,0xcd,0x00,0x00,0x00, -0xcc,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0xd4,0x00,0x00,0x00,0xc2,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0xd5,0x00,0x00,0x00,0xd4,0x00,0x00,0x00, -0x97,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x5d,0x00,0x00,0x00, -0xd6,0x00,0x00,0x00,0xd5,0x00,0x00,0x00,0x85,0x00,0x05,0x00, -0x5d,0x00,0x00,0x00,0xd7,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0xd6,0x00,0x00,0x00,0x7f,0x00,0x04,0x00,0x5d,0x00,0x00,0x00, -0x0a,0x01,0x00,0x00,0xd7,0x00,0x00,0x00,0x0c,0x00,0x08,0x00, -0x5d,0x00,0x00,0x00,0xd8,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0xcd,0x00,0x00,0x00, -0x0a,0x01,0x00,0x00,0x41,0x00,0x06,0x00,0x70,0x00,0x00,0x00, -0xd9,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0xbd,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xd9,0x00,0x00,0x00, -0xd8,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xdc,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0xdb,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xe1,0x00,0x00,0x00, -0x47,0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x69,0x00,0x00,0x00,0xe2,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0xe1,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0xe3,0x00,0x00,0x00,0xe2,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0xe4,0x00,0x00,0x00,0xe3,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xe5,0x00,0x00,0x00, -0xe4,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xe6,0x00,0x00,0x00,0xe5,0x00,0x00,0x00,0x86,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x56,0x00,0x00,0x00,0xe8,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0xe9,0x00,0x00,0x00,0xe8,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xea,0x00,0x00,0x00, -0xe9,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xeb,0x00,0x00,0x00,0xea,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xec,0x00,0x00,0x00, -0xe6,0x00,0x00,0x00,0xeb,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x5d,0x00,0x00,0x00,0xed,0x00,0x00,0x00,0xec,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x56,0x00,0x00,0x00,0xf4,0x00,0x00,0x00, -0xe2,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0xf5,0x00,0x00,0x00,0xf4,0x00,0x00,0x00,0x97,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x5d,0x00,0x00,0x00,0xf6,0x00,0x00,0x00, -0xf5,0x00,0x00,0x00,0x85,0x00,0x05,0x00,0x5d,0x00,0x00,0x00, -0xf7,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0xf6,0x00,0x00,0x00, -0x7f,0x00,0x04,0x00,0x5d,0x00,0x00,0x00,0x0b,0x01,0x00,0x00, -0xf7,0x00,0x00,0x00,0x0c,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0xf8,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0xed,0x00,0x00,0x00,0x0b,0x01,0x00,0x00, -0x41,0x00,0x06,0x00,0x70,0x00,0x00,0x00,0xf9,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0xdc,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xf9,0x00,0x00,0x00,0xf8,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x0d,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x0d,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xfb,0x00,0x00,0x00,0x05,0x01,0x00,0x00,0x29,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x0a,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x0c,0x00,0x00,0x00,0xf5,0x00,0x07,0x00,0x11,0x00,0x00,0x00, -0x06,0x01,0x00,0x00,0xff,0x00,0x00,0x00,0x0a,0x00,0x00,0x00, -0x02,0x01,0x00,0x00,0x2f,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x03,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x06,0x01,0x00,0x00,0xfd,0x00,0x00,0x00,0x03,0x01,0x00,0x00, -0xf8,0x00,0x02,0x00,0x03,0x01,0x00,0x00,0xf9,0x00,0x02,0x00, -0xfd,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xfd,0x00,0x00,0x00, -0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, -}; -const uint64_t dequant_q2_K_len = 3956; - -unsigned char dequant_q2_K_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x13,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00, 0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00, @@ -1869,414 +1385,10 @@ unsigned char dequant_q2_K_fp32_data[] = { 0x04,0x01,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t dequant_q2_K_fp32_len = 4056; +const uint64_t dequant_q2_K_len = 4056; unsigned char dequant_q3_K_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x3f,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x25,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x07,0x01,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x17,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x23,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x23,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x23,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x33,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x71,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x73,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x75,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x77,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x77,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x77,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x77,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x6c,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x78,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x79,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x79,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x79,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x7b,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x7b,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x04,0x01,0x00,0x00, -0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x05,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x05,0x01,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x05,0x01,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x07,0x01,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x07,0x01,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2c,0x01,0x00,0x00, -0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, -0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x14,0x00,0x02,0x00,0x11,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x15,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x15,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x16,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x19,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x1e,0x00,0x06,0x00,0x23,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x24,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x26,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x16,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x3b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x52,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x70,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x1c,0x00,0x04,0x00,0x71,0x00,0x00,0x00, -0x52,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x73,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, -0x75,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x76,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x77,0x00,0x00,0x00,0x71,0x00,0x00,0x00, -0x73,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x78,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x79,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x7a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x7a,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x7f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x84,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x8e,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xad,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xd3,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0xe1,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xe8,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xef,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x04,0x01,0x00,0x00, -0x76,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x05,0x01,0x00,0x00, -0x04,0x01,0x00,0x00,0x20,0x00,0x04,0x00,0x06,0x01,0x00,0x00, -0x0c,0x00,0x00,0x00,0x05,0x01,0x00,0x00,0x3b,0x00,0x04,0x00, -0x06,0x01,0x00,0x00,0x07,0x01,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x2b,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x2c,0x00,0x06,0x00,0x15,0x00,0x00,0x00, -0x2c,0x01,0x00,0x00,0x72,0x00,0x00,0x00,0x2b,0x01,0x00,0x00, -0x2b,0x01,0x00,0x00,0x2a,0x00,0x03,0x00,0x11,0x00,0x00,0x00, -0x2f,0x01,0x00,0x00,0x29,0x00,0x03,0x00,0x11,0x00,0x00,0x00, -0x32,0x01,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x2d,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00, -0x18,0x00,0x00,0x00,0x2e,0x01,0x00,0x00,0xf8,0x00,0x02,0x00, -0x2e,0x01,0x00,0x00,0xf9,0x00,0x02,0x00,0x0a,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x0a,0x00,0x00,0x00,0xf5,0x00,0x07,0x00, -0x06,0x00,0x00,0x00,0x35,0x01,0x00,0x00,0x09,0x00,0x00,0x00, -0x2e,0x01,0x00,0x00,0x2a,0x01,0x00,0x00,0x0d,0x00,0x00,0x00, -0xb1,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x35,0x01,0x00,0x00,0x10,0x00,0x00,0x00,0xf6,0x00,0x04,0x00, -0x0c,0x00,0x00,0x00,0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x12,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x0b,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x19,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x14,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x35,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x14,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00, -0x25,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0x27,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x26,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x25,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x2c,0x00,0x00,0x00, -0x28,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x2c,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0xaf,0x00,0x05,0x00,0x11,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x30,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x2e,0x00,0x00,0x00,0x2f,0x00,0x00,0x00, -0x30,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x2f,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x0c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x30,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x19,0x00,0x00,0x00, -0x34,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x35,0x00,0x00,0x00, -0x34,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x36,0x00,0x00,0x00,0x35,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x36,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3c,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x3b,0x00,0x00,0x00, -0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x3b,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x43,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x47,0x00,0x00,0x00,0x36,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x47,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x43,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4c,0x00,0x00,0x00,0x3c,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x4c,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x3c,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x59,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x3c,0x00,0x00,0x00, -0x72,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x59,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, -0x4c,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x3b,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0xb1,0x00,0x05,0x00,0x11,0x00,0x00,0x00, -0x6c,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x6f,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x6c,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x92,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x6e,0x00,0x00,0x00, -0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x7f,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x3b,0x00,0x00,0x00, -0x7e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0x81,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0x81,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x83,0x00,0x00,0x00, -0x82,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x85,0x00,0x00,0x00,0x83,0x00,0x00,0x00,0x84,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x88,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x7f,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x3b,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0x8a,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x52,0x00,0x00,0x00,0x8b,0x00,0x00,0x00,0x8a,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x8b,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x8d,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x8f,0x00,0x00,0x00, -0x8d,0x00,0x00,0x00,0x8e,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0x8f,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x91,0x00,0x00,0x00,0x85,0x00,0x00,0x00,0x90,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x6f,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x92,0x00,0x00,0x00,0xb1,0x00,0x05,0x00,0x11,0x00,0x00,0x00, -0x94,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x97,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x94,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0xab,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x96,0x00,0x00,0x00, -0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x9a,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x7f,0x00,0x00,0x00,0x9b,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x3b,0x00,0x00,0x00, -0x9a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0x9c,0x00,0x00,0x00,0x9b,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x9d,0x00,0x00,0x00,0x9c,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x9e,0x00,0x00,0x00, -0x9d,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x9f,0x00,0x00,0x00,0x9e,0x00,0x00,0x00,0x84,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xa2,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x7f,0x00,0x00,0x00,0xa3,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x3b,0x00,0x00,0x00, -0xa2,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0xa4,0x00,0x00,0x00,0xa3,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x52,0x00,0x00,0x00,0xa5,0x00,0x00,0x00,0xa4,0x00,0x00,0x00, -0x3b,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0xa6,0x00,0x00,0x00,0xa5,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xa7,0x00,0x00,0x00,0xa6,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xa8,0x00,0x00,0x00, -0xa7,0x00,0x00,0x00,0x8e,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa9,0x00,0x00,0x00,0xa8,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xaa,0x00,0x00,0x00,0x9f,0x00,0x00,0x00,0xa9,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x97,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0xab,0x00,0x00,0x00,0xb1,0x00,0x05,0x00,0x11,0x00,0x00,0x00, -0xae,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0xad,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0xb1,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0xae,0x00,0x00,0x00,0xb0,0x00,0x00,0x00, -0xc5,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xb0,0x00,0x00,0x00, -0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xb4,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x7f,0x00,0x00,0x00,0xb5,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x3b,0x00,0x00,0x00, -0xb4,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0xb6,0x00,0x00,0x00,0xb5,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x52,0x00,0x00,0x00,0xb7,0x00,0x00,0x00,0xb6,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0xb8,0x00,0x00,0x00,0xb7,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb9,0x00,0x00,0x00,0xb8,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00,0xbd,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x3b,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0xbe,0x00,0x00,0x00,0xbd,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x52,0x00,0x00,0x00,0xbf,0x00,0x00,0x00, -0xbe,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0xbf,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xc1,0x00,0x00,0x00, -0xc0,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xc2,0x00,0x00,0x00,0xc1,0x00,0x00,0x00,0x8e,0x00,0x00,0x00, -0xc4,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xc3,0x00,0x00,0x00, -0xc2,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xc4,0x00,0x00,0x00,0xb9,0x00,0x00,0x00, -0xc3,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0xb1,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xc5,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xc8,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00, -0xc9,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x3b,0x00,0x00,0x00,0xc8,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x52,0x00,0x00,0x00,0xca,0x00,0x00,0x00, -0xc9,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x52,0x00,0x00,0x00, -0xcb,0x00,0x00,0x00,0xca,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0xcc,0x00,0x00,0x00, -0xcb,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xcd,0x00,0x00,0x00,0xcc,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xd0,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00, -0xd1,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x3b,0x00,0x00,0x00,0xd0,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x52,0x00,0x00,0x00,0xd2,0x00,0x00,0x00, -0xd1,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x52,0x00,0x00,0x00, -0xd4,0x00,0x00,0x00,0xd2,0x00,0x00,0x00,0xd3,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0xd5,0x00,0x00,0x00, -0xd4,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xd6,0x00,0x00,0x00,0xd5,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xd7,0x00,0x00,0x00,0xd6,0x00,0x00,0x00, -0x8e,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xd8,0x00,0x00,0x00,0xd7,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xd9,0x00,0x00,0x00, -0xcd,0x00,0x00,0x00,0xd8,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0xb1,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xb1,0x00,0x00,0x00, -0xf5,0x00,0x07,0x00,0x06,0x00,0x00,0x00,0x36,0x01,0x00,0x00, -0xc4,0x00,0x00,0x00,0xb0,0x00,0x00,0x00,0xd9,0x00,0x00,0x00, -0xc5,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x97,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x97,0x00,0x00,0x00,0xf5,0x00,0x07,0x00, -0x06,0x00,0x00,0x00,0x37,0x01,0x00,0x00,0xaa,0x00,0x00,0x00, -0x96,0x00,0x00,0x00,0x36,0x01,0x00,0x00,0xb1,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x6f,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x6f,0x00,0x00,0x00,0xf5,0x00,0x07,0x00,0x06,0x00,0x00,0x00, -0x38,0x01,0x00,0x00,0x91,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x37,0x01,0x00,0x00,0x97,0x00,0x00,0x00,0x72,0x00,0x04,0x00, -0x5a,0x00,0x00,0x00,0xdd,0x00,0x00,0x00,0x38,0x01,0x00,0x00, -0x41,0x00,0x07,0x00,0xe1,0x00,0x00,0x00,0xe2,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x8e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x76,0x00,0x00,0x00, -0xe3,0x00,0x00,0x00,0xe2,0x00,0x00,0x00,0x72,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xe7,0x00,0x00,0x00,0xdd,0x00,0x00,0x00, -0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xe9,0x00,0x00,0x00, -0xe7,0x00,0x00,0x00,0xe8,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x76,0x00,0x00,0x00,0xea,0x00,0x00,0x00,0xe9,0x00,0x00,0x00, -0x85,0x00,0x05,0x00,0x76,0x00,0x00,0x00,0xeb,0x00,0x00,0x00, -0xe3,0x00,0x00,0x00,0xea,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xee,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xf1,0x00,0x00,0x00,0xef,0x00,0x00,0x00,0x4c,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xf2,0x00,0x00,0x00, -0xee,0x00,0x00,0x00,0xf1,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xf4,0x00,0x00,0x00,0xe8,0x00,0x00,0x00, -0x51,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xf5,0x00,0x00,0x00,0xf2,0x00,0x00,0x00,0xf4,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xf8,0x00,0x00,0x00, -0xe8,0x00,0x00,0x00,0x4c,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0xfb,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xfb,0x00,0x00,0x00, -0xf5,0x00,0x07,0x00,0x06,0x00,0x00,0x00,0x39,0x01,0x00,0x00, -0x49,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x28,0x01,0x00,0x00, -0xfc,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x02,0x01,0x00,0x00,0x49,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0xb1,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x03,0x01,0x00,0x00, -0x39,0x01,0x00,0x00,0x02,0x01,0x00,0x00,0xf6,0x00,0x04,0x00, -0xfd,0x00,0x00,0x00,0xfc,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x03,0x01,0x00,0x00,0xfc,0x00,0x00,0x00, -0xfd,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xfc,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x0a,0x01,0x00,0x00, -0xf5,0x00,0x00,0x00,0x39,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x0f,0x01,0x00,0x00,0xf8,0x00,0x00,0x00, -0x39,0x01,0x00,0x00,0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00, -0x10,0x01,0x00,0x00,0x7b,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x0f,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x52,0x00,0x00,0x00,0x11,0x01,0x00,0x00, -0x10,0x01,0x00,0x00,0xc2,0x00,0x05,0x00,0x52,0x00,0x00,0x00, -0x13,0x01,0x00,0x00,0x11,0x01,0x00,0x00,0x62,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x14,0x01,0x00,0x00, -0x13,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x15,0x01,0x00,0x00,0x14,0x01,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x16,0x01,0x00,0x00,0x15,0x01,0x00,0x00, -0x8e,0x00,0x00,0x00,0x72,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, -0x17,0x01,0x00,0x00,0x16,0x01,0x00,0x00,0x72,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x18,0x01,0x00,0x00,0x17,0x01,0x00,0x00, -0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00,0x1b,0x01,0x00,0x00, -0x7b,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x39,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x1c,0x01,0x00,0x00,0x1b,0x01,0x00,0x00, -0xc7,0x00,0x05,0x00,0x52,0x00,0x00,0x00,0x1e,0x01,0x00,0x00, -0x1c,0x01,0x00,0x00,0x5c,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x1f,0x01,0x00,0x00,0x1e,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x01,0x00,0x00, -0x1f,0x01,0x00,0x00,0xab,0x00,0x05,0x00,0x11,0x00,0x00,0x00, -0x21,0x01,0x00,0x00,0x20,0x01,0x00,0x00,0x09,0x00,0x00,0x00, -0xa9,0x00,0x06,0x00,0x06,0x00,0x00,0x00,0x22,0x01,0x00,0x00, -0x21,0x01,0x00,0x00,0x09,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x23,0x01,0x00,0x00, -0x18,0x01,0x00,0x00,0x22,0x01,0x00,0x00,0x6f,0x00,0x04,0x00, -0x76,0x00,0x00,0x00,0x24,0x01,0x00,0x00,0x23,0x01,0x00,0x00, -0x85,0x00,0x05,0x00,0x76,0x00,0x00,0x00,0x25,0x01,0x00,0x00, -0xeb,0x00,0x00,0x00,0x24,0x01,0x00,0x00,0x41,0x00,0x06,0x00, -0xe1,0x00,0x00,0x00,0x26,0x01,0x00,0x00,0x07,0x01,0x00,0x00, -0x09,0x00,0x00,0x00,0x0a,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x26,0x01,0x00,0x00,0x25,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x28,0x01,0x00,0x00,0x39,0x01,0x00,0x00, -0x29,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0xfb,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xfd,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x0d,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x0d,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x2a,0x01,0x00,0x00, -0x35,0x01,0x00,0x00,0x29,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x0a,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x0c,0x00,0x00,0x00, -0xf5,0x00,0x07,0x00,0x11,0x00,0x00,0x00,0x3e,0x01,0x00,0x00, -0x2f,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x32,0x01,0x00,0x00, -0x2f,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x33,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x3e,0x01,0x00,0x00, -0x2d,0x01,0x00,0x00,0x33,0x01,0x00,0x00,0xf8,0x00,0x02,0x00, -0x33,0x01,0x00,0x00,0xf9,0x00,0x02,0x00,0x2d,0x01,0x00,0x00, -0xf8,0x00,0x02,0x00,0x2d,0x01,0x00,0x00,0xfd,0x00,0x01,0x00, -0x38,0x00,0x01,0x00, -}; -const uint64_t dequant_q3_K_len = 4792; - -unsigned char dequant_q3_K_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x42,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00, 0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00, @@ -2680,709 +1792,10 @@ unsigned char dequant_q3_K_fp32_data[] = { 0xf8,0x00,0x02,0x00,0x30,0x01,0x00,0x00,0xfd,0x00,0x01,0x00, 0x38,0x00,0x01,0x00, }; -const uint64_t dequant_q3_K_fp32_len = 4828; +const uint64_t dequant_q3_K_len = 4828; unsigned char dequant_q4_0_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0xf7,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x09,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x0c,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x14,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x14,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x50,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x51,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x52,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x52,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x52,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x54,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x74,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x75,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x75,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x75,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x77,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x77,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x97,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, -0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0a,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x0d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0e,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x14,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x15,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x15,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x18,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x14,0x00,0x02,0x00, -0x24,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x4a,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x1e,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x51,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x52,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x5d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0x17,0x00,0x04,0x00,0x60,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x00,0x48,0x00,0x00,0x1d,0x00,0x03,0x00,0x74,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x75,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x76,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x76,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x79,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x90,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x96,0x00,0x00,0x00,0x00,0x01,0x00,0x00, -0x2c,0x00,0x06,0x00,0x0a,0x00,0x00,0x00,0x97,0x00,0x00,0x00, -0x96,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0x90,0x00,0x00,0x00, -0x2c,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0xa2,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xde,0x02,0x00,0x00,0x11,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xdf,0x02,0x00,0x00, -0x12,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xe0,0x02,0x00,0x00,0x13,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xe1,0x02,0x00,0x00,0x14,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xe2,0x02,0x00,0x00, -0x05,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xe3,0x02,0x00,0x00,0x15,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xe4,0x02,0x00,0x00,0x06,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xe5,0x02,0x00,0x00, -0x16,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xe6,0x02,0x00,0x00,0x07,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xe7,0x02,0x00,0x00,0x17,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xe8,0x02,0x00,0x00, -0x08,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xe9,0x02,0x00,0x00,0x18,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xea,0x02,0x00,0x00,0x09,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xeb,0x02,0x00,0x00, -0x19,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xec,0x02,0x00,0x00,0x0a,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xed,0x02,0x00,0x00,0x1a,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xee,0x02,0x00,0x00, -0x0b,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xef,0x02,0x00,0x00,0x1b,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xf0,0x02,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xf1,0x02,0x00,0x00, -0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xf2,0x02,0x00,0x00,0x0d,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xf3,0x02,0x00,0x00,0x1d,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xf4,0x02,0x00,0x00, -0x0e,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xf5,0x02,0x00,0x00,0x1e,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xf6,0x02,0x00,0x00,0x1f,0x00,0x00,0x00, -0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x98,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00,0x0d,0x00,0x00,0x00, -0x99,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x99,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0e,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x0d,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0xaf,0x00,0x05,0x00,0x24,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0xa8,0x00,0x04,0x00, -0x24,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x2c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x2a,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0x2c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x2b,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00,0x2f,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x2f,0x00,0x00,0x00, -0xaf,0x00,0x05,0x00,0x24,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x2c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x2c,0x00,0x00,0x00, -0xf5,0x00,0x07,0x00,0x24,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x99,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x34,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x32,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x34,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x33,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x98,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x34,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x18,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x39,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3e,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x3e,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x41,0x00,0x07,0x00, -0x56,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x58,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0x83,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, -0xa2,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0x73,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x58,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x7f,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x84,0x00,0x00,0x00,0x73,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x85,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x7f,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x85,0x00,0x00,0x00,0x84,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x8f,0x00,0x00,0x00, -0x7f,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x92,0x00,0x00,0x00,0x73,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x93,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x8f,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x93,0x00,0x00,0x00, -0x92,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xa9,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x5d,0x00,0x00,0x00,0xaa,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0xab,0x00,0x00,0x00,0xaa,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xac,0x00,0x00,0x00,0xab,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xad,0x00,0x00,0x00, -0xac,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xae,0x00,0x00,0x00,0xad,0x00,0x00,0x00,0x66,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xaf,0x00,0x00,0x00, -0xae,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0xb0,0x00,0x00,0x00,0xab,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xb1,0x00,0x00,0x00, -0xb0,0x00,0x00,0x00,0x50,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0xb2,0x00,0x00,0x00,0xaf,0x00,0x00,0x00,0xb1,0x00,0x00,0x00, -0x83,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0xb3,0x00,0x00,0x00, -0xb2,0x00,0x00,0x00,0xa2,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0xb4,0x00,0x00,0x00,0xb3,0x00,0x00,0x00, -0xa9,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xba,0x00,0x00,0x00,0x7f,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xbc,0x00,0x00,0x00, -0xb4,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0xbd,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xba,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xbd,0x00,0x00,0x00,0xbc,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xc4,0x00,0x00,0x00,0x7f,0x00,0x00,0x00, -0xde,0x02,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xc5,0x00,0x00,0x00,0xb4,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xc6,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xc4,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xc6,0x00,0x00,0x00,0xc5,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xcf,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0xd0,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0xd1,0x00,0x00,0x00, -0xd0,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xd2,0x00,0x00,0x00,0xd1,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xd3,0x00,0x00,0x00,0xd2,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xd4,0x00,0x00,0x00, -0xd3,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xd5,0x00,0x00,0x00,0xd4,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0xd6,0x00,0x00,0x00, -0xd1,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xd7,0x00,0x00,0x00,0xd6,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0xd8,0x00,0x00,0x00, -0xd5,0x00,0x00,0x00,0xd7,0x00,0x00,0x00,0x83,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0xd9,0x00,0x00,0x00,0xd8,0x00,0x00,0x00, -0xa2,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0xda,0x00,0x00,0x00,0xd9,0x00,0x00,0x00,0xcf,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xe0,0x00,0x00,0x00, -0x7f,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xe2,0x00,0x00,0x00,0xda,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0xe3,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xe0,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xe3,0x00,0x00,0x00, -0xe2,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xea,0x00,0x00,0x00,0x7f,0x00,0x00,0x00,0xdf,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xeb,0x00,0x00,0x00, -0xda,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0xec,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xea,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xec,0x00,0x00,0x00,0xeb,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xf5,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0xf6,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0xf7,0x00,0x00,0x00,0xf6,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xf8,0x00,0x00,0x00, -0xf7,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xf9,0x00,0x00,0x00,0xf8,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xfa,0x00,0x00,0x00,0xf9,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xfb,0x00,0x00,0x00,0xfa,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0xfc,0x00,0x00,0x00,0xf7,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xfd,0x00,0x00,0x00,0xfc,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0xfe,0x00,0x00,0x00,0xfb,0x00,0x00,0x00, -0xfd,0x00,0x00,0x00,0x83,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0xff,0x00,0x00,0x00,0xfe,0x00,0x00,0x00,0xa2,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0x00,0x01,0x00,0x00, -0xff,0x00,0x00,0x00,0xf5,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x06,0x01,0x00,0x00,0x7f,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x08,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x09,0x01,0x00,0x00, -0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x06,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x09,0x01,0x00,0x00,0x08,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x10,0x01,0x00,0x00, -0x7f,0x00,0x00,0x00,0xe0,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x11,0x01,0x00,0x00,0x00,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x12,0x01,0x00,0x00,0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x10,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x12,0x01,0x00,0x00, -0x11,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x1b,0x01,0x00,0x00,0x57,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x5d,0x00,0x00,0x00,0x1c,0x01,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x1d,0x01,0x00,0x00,0x1c,0x01,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x1e,0x01,0x00,0x00,0x1d,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1f,0x01,0x00,0x00, -0x1e,0x01,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x20,0x01,0x00,0x00,0x1f,0x01,0x00,0x00,0x66,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x21,0x01,0x00,0x00, -0x20,0x01,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0x22,0x01,0x00,0x00,0x1d,0x01,0x00,0x00,0x6a,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x23,0x01,0x00,0x00, -0x22,0x01,0x00,0x00,0x50,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0x24,0x01,0x00,0x00,0x21,0x01,0x00,0x00,0x23,0x01,0x00,0x00, -0x83,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0x25,0x01,0x00,0x00, -0x24,0x01,0x00,0x00,0xa2,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0x26,0x01,0x00,0x00,0x25,0x01,0x00,0x00, -0x1b,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x2c,0x01,0x00,0x00,0x7f,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x2e,0x01,0x00,0x00, -0x26,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x2f,0x01,0x00,0x00,0x77,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x2c,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x2f,0x01,0x00,0x00,0x2e,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x36,0x01,0x00,0x00,0x7f,0x00,0x00,0x00, -0xe1,0x02,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x37,0x01,0x00,0x00,0x26,0x01,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x38,0x01,0x00,0x00, -0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x36,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x38,0x01,0x00,0x00,0x37,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x41,0x01,0x00,0x00, -0x57,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x42,0x01,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xe2,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x43,0x01,0x00,0x00, -0x42,0x01,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x44,0x01,0x00,0x00,0x43,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x45,0x01,0x00,0x00,0x44,0x01,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x46,0x01,0x00,0x00, -0x45,0x01,0x00,0x00,0x66,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x47,0x01,0x00,0x00,0x46,0x01,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0x48,0x01,0x00,0x00, -0x43,0x01,0x00,0x00,0x6a,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x49,0x01,0x00,0x00,0x48,0x01,0x00,0x00, -0x50,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0x4a,0x01,0x00,0x00, -0x47,0x01,0x00,0x00,0x49,0x01,0x00,0x00,0x83,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0x4b,0x01,0x00,0x00,0x4a,0x01,0x00,0x00, -0xa2,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0x4c,0x01,0x00,0x00,0x4b,0x01,0x00,0x00,0x41,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x52,0x01,0x00,0x00, -0x7f,0x00,0x00,0x00,0xe2,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x54,0x01,0x00,0x00,0x4c,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x55,0x01,0x00,0x00,0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x52,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x55,0x01,0x00,0x00, -0x54,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x5c,0x01,0x00,0x00,0x7f,0x00,0x00,0x00,0xe3,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x5d,0x01,0x00,0x00, -0x4c,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x5e,0x01,0x00,0x00,0x77,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x5c,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x5e,0x01,0x00,0x00,0x5d,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x67,0x01,0x00,0x00,0x57,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0x68,0x01,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xe4,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x69,0x01,0x00,0x00,0x68,0x01,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x6a,0x01,0x00,0x00, -0x69,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x6b,0x01,0x00,0x00,0x6a,0x01,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6c,0x01,0x00,0x00,0x6b,0x01,0x00,0x00, -0x66,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x6d,0x01,0x00,0x00,0x6c,0x01,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0x6e,0x01,0x00,0x00,0x69,0x01,0x00,0x00, -0x6a,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x6f,0x01,0x00,0x00,0x6e,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0x70,0x01,0x00,0x00,0x6d,0x01,0x00,0x00, -0x6f,0x01,0x00,0x00,0x83,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0x71,0x01,0x00,0x00,0x70,0x01,0x00,0x00,0xa2,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0x72,0x01,0x00,0x00, -0x71,0x01,0x00,0x00,0x67,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x78,0x01,0x00,0x00,0x7f,0x00,0x00,0x00, -0xe4,0x02,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x7a,0x01,0x00,0x00,0x72,0x01,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x7b,0x01,0x00,0x00, -0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x78,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x7b,0x01,0x00,0x00,0x7a,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x82,0x01,0x00,0x00, -0x7f,0x00,0x00,0x00,0xe5,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x83,0x01,0x00,0x00,0x72,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x84,0x01,0x00,0x00,0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x82,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x84,0x01,0x00,0x00, -0x83,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x8d,0x01,0x00,0x00,0x57,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x5d,0x00,0x00,0x00,0x8e,0x01,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0xe6,0x02,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x8f,0x01,0x00,0x00,0x8e,0x01,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x90,0x01,0x00,0x00,0x8f,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x91,0x01,0x00,0x00, -0x90,0x01,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x92,0x01,0x00,0x00,0x91,0x01,0x00,0x00,0x66,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x93,0x01,0x00,0x00, -0x92,0x01,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0x94,0x01,0x00,0x00,0x8f,0x01,0x00,0x00,0x6a,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x95,0x01,0x00,0x00, -0x94,0x01,0x00,0x00,0x50,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0x96,0x01,0x00,0x00,0x93,0x01,0x00,0x00,0x95,0x01,0x00,0x00, -0x83,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0x97,0x01,0x00,0x00, -0x96,0x01,0x00,0x00,0xa2,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0x98,0x01,0x00,0x00,0x97,0x01,0x00,0x00, -0x8d,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x9e,0x01,0x00,0x00,0x7f,0x00,0x00,0x00,0xe6,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xa0,0x01,0x00,0x00, -0x98,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0xa1,0x01,0x00,0x00,0x77,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x9e,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0xa1,0x01,0x00,0x00,0xa0,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa8,0x01,0x00,0x00,0x7f,0x00,0x00,0x00, -0xe7,0x02,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xa9,0x01,0x00,0x00,0x98,0x01,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xaa,0x01,0x00,0x00, -0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xa8,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0xaa,0x01,0x00,0x00,0xa9,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xb3,0x01,0x00,0x00, -0x57,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0xb4,0x01,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xe8,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0xb5,0x01,0x00,0x00, -0xb4,0x01,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xb6,0x01,0x00,0x00,0xb5,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb7,0x01,0x00,0x00,0xb6,0x01,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xb8,0x01,0x00,0x00, -0xb7,0x01,0x00,0x00,0x66,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xb9,0x01,0x00,0x00,0xb8,0x01,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0xba,0x01,0x00,0x00, -0xb5,0x01,0x00,0x00,0x6a,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xbb,0x01,0x00,0x00,0xba,0x01,0x00,0x00, -0x50,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0xbc,0x01,0x00,0x00, -0xb9,0x01,0x00,0x00,0xbb,0x01,0x00,0x00,0x83,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0xbd,0x01,0x00,0x00,0xbc,0x01,0x00,0x00, -0xa2,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0xbe,0x01,0x00,0x00,0xbd,0x01,0x00,0x00,0xb3,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xc4,0x01,0x00,0x00, -0x7f,0x00,0x00,0x00,0xe8,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xc6,0x01,0x00,0x00,0xbe,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0xc7,0x01,0x00,0x00,0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xc4,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xc7,0x01,0x00,0x00, -0xc6,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xce,0x01,0x00,0x00,0x7f,0x00,0x00,0x00,0xe9,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xcf,0x01,0x00,0x00, -0xbe,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0xd0,0x01,0x00,0x00,0x77,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xce,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0xd0,0x01,0x00,0x00,0xcf,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xd9,0x01,0x00,0x00,0x57,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0xda,0x01,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xea,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0xdb,0x01,0x00,0x00,0xda,0x01,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xdc,0x01,0x00,0x00, -0xdb,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xdd,0x01,0x00,0x00,0xdc,0x01,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xde,0x01,0x00,0x00,0xdd,0x01,0x00,0x00, -0x66,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xdf,0x01,0x00,0x00,0xde,0x01,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0xe0,0x01,0x00,0x00,0xdb,0x01,0x00,0x00, -0x6a,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xe1,0x01,0x00,0x00,0xe0,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0xe2,0x01,0x00,0x00,0xdf,0x01,0x00,0x00, -0xe1,0x01,0x00,0x00,0x83,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0xe3,0x01,0x00,0x00,0xe2,0x01,0x00,0x00,0xa2,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0xe4,0x01,0x00,0x00, -0xe3,0x01,0x00,0x00,0xd9,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xea,0x01,0x00,0x00,0x7f,0x00,0x00,0x00, -0xea,0x02,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xec,0x01,0x00,0x00,0xe4,0x01,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xed,0x01,0x00,0x00, -0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xea,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0xed,0x01,0x00,0x00,0xec,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xf4,0x01,0x00,0x00, -0x7f,0x00,0x00,0x00,0xeb,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xf5,0x01,0x00,0x00,0xe4,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0xf6,0x01,0x00,0x00,0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xf4,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xf6,0x01,0x00,0x00, -0xf5,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xff,0x01,0x00,0x00,0x57,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x5d,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0xec,0x02,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x01,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x01,0x02,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x03,0x02,0x00,0x00, -0x02,0x02,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x04,0x02,0x00,0x00,0x03,0x02,0x00,0x00,0x66,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x05,0x02,0x00,0x00, -0x04,0x02,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0x06,0x02,0x00,0x00,0x01,0x02,0x00,0x00,0x6a,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x07,0x02,0x00,0x00, -0x06,0x02,0x00,0x00,0x50,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0x08,0x02,0x00,0x00,0x05,0x02,0x00,0x00,0x07,0x02,0x00,0x00, -0x83,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0x09,0x02,0x00,0x00, -0x08,0x02,0x00,0x00,0xa2,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0x0a,0x02,0x00,0x00,0x09,0x02,0x00,0x00, -0xff,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x10,0x02,0x00,0x00,0x7f,0x00,0x00,0x00,0xec,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x12,0x02,0x00,0x00, -0x0a,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x13,0x02,0x00,0x00,0x77,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x10,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x13,0x02,0x00,0x00,0x12,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x1a,0x02,0x00,0x00,0x7f,0x00,0x00,0x00, -0xed,0x02,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x1b,0x02,0x00,0x00,0x0a,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x1c,0x02,0x00,0x00, -0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x1a,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x1c,0x02,0x00,0x00,0x1b,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x25,0x02,0x00,0x00, -0x57,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x26,0x02,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xee,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x27,0x02,0x00,0x00, -0x26,0x02,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x28,0x02,0x00,0x00,0x27,0x02,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x29,0x02,0x00,0x00,0x28,0x02,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x2a,0x02,0x00,0x00, -0x29,0x02,0x00,0x00,0x66,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x2b,0x02,0x00,0x00,0x2a,0x02,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0x2c,0x02,0x00,0x00, -0x27,0x02,0x00,0x00,0x6a,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x2d,0x02,0x00,0x00,0x2c,0x02,0x00,0x00, -0x50,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0x2e,0x02,0x00,0x00, -0x2b,0x02,0x00,0x00,0x2d,0x02,0x00,0x00,0x83,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0x2f,0x02,0x00,0x00,0x2e,0x02,0x00,0x00, -0xa2,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0x30,0x02,0x00,0x00,0x2f,0x02,0x00,0x00,0x25,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x36,0x02,0x00,0x00, -0x7f,0x00,0x00,0x00,0xee,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x38,0x02,0x00,0x00,0x30,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x39,0x02,0x00,0x00,0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x36,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x39,0x02,0x00,0x00, -0x38,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x40,0x02,0x00,0x00,0x7f,0x00,0x00,0x00,0xef,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x41,0x02,0x00,0x00, -0x30,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x42,0x02,0x00,0x00,0x77,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x42,0x02,0x00,0x00,0x41,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x4b,0x02,0x00,0x00,0x57,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0x4c,0x02,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xf0,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x4d,0x02,0x00,0x00,0x4c,0x02,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x4e,0x02,0x00,0x00, -0x4d,0x02,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x4f,0x02,0x00,0x00,0x4e,0x02,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x50,0x02,0x00,0x00,0x4f,0x02,0x00,0x00, -0x66,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x51,0x02,0x00,0x00,0x50,0x02,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0x52,0x02,0x00,0x00,0x4d,0x02,0x00,0x00, -0x6a,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x53,0x02,0x00,0x00,0x52,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0x54,0x02,0x00,0x00,0x51,0x02,0x00,0x00, -0x53,0x02,0x00,0x00,0x83,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0x55,0x02,0x00,0x00,0x54,0x02,0x00,0x00,0xa2,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0x56,0x02,0x00,0x00, -0x55,0x02,0x00,0x00,0x4b,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x5c,0x02,0x00,0x00,0x7f,0x00,0x00,0x00, -0xf0,0x02,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x5e,0x02,0x00,0x00,0x56,0x02,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x5f,0x02,0x00,0x00, -0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x5c,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x5f,0x02,0x00,0x00,0x5e,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x66,0x02,0x00,0x00, -0x7f,0x00,0x00,0x00,0xf1,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x67,0x02,0x00,0x00,0x56,0x02,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x68,0x02,0x00,0x00,0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x66,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x68,0x02,0x00,0x00, -0x67,0x02,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x71,0x02,0x00,0x00,0x57,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x5d,0x00,0x00,0x00,0x72,0x02,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0xf2,0x02,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x73,0x02,0x00,0x00,0x72,0x02,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x74,0x02,0x00,0x00,0x73,0x02,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x75,0x02,0x00,0x00, -0x74,0x02,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x76,0x02,0x00,0x00,0x75,0x02,0x00,0x00,0x66,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x77,0x02,0x00,0x00, -0x76,0x02,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0x78,0x02,0x00,0x00,0x73,0x02,0x00,0x00,0x6a,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x79,0x02,0x00,0x00, -0x78,0x02,0x00,0x00,0x50,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0x7a,0x02,0x00,0x00,0x77,0x02,0x00,0x00,0x79,0x02,0x00,0x00, -0x83,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0x7b,0x02,0x00,0x00, -0x7a,0x02,0x00,0x00,0xa2,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0x7c,0x02,0x00,0x00,0x7b,0x02,0x00,0x00, -0x71,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x82,0x02,0x00,0x00,0x7f,0x00,0x00,0x00,0xf2,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x84,0x02,0x00,0x00, -0x7c,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x85,0x02,0x00,0x00,0x77,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x82,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x85,0x02,0x00,0x00,0x84,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x8c,0x02,0x00,0x00,0x7f,0x00,0x00,0x00, -0xf3,0x02,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x8d,0x02,0x00,0x00,0x7c,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x8e,0x02,0x00,0x00, -0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x8c,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x8e,0x02,0x00,0x00,0x8d,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x97,0x02,0x00,0x00, -0x57,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x98,0x02,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xf4,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x99,0x02,0x00,0x00, -0x98,0x02,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x9a,0x02,0x00,0x00,0x99,0x02,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x9b,0x02,0x00,0x00,0x9a,0x02,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x9c,0x02,0x00,0x00, -0x9b,0x02,0x00,0x00,0x66,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x9d,0x02,0x00,0x00,0x9c,0x02,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0x9e,0x02,0x00,0x00, -0x99,0x02,0x00,0x00,0x6a,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x9f,0x02,0x00,0x00,0x9e,0x02,0x00,0x00, -0x50,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0xa0,0x02,0x00,0x00, -0x9d,0x02,0x00,0x00,0x9f,0x02,0x00,0x00,0x83,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0xa1,0x02,0x00,0x00,0xa0,0x02,0x00,0x00, -0xa2,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0xa2,0x02,0x00,0x00,0xa1,0x02,0x00,0x00,0x97,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xa8,0x02,0x00,0x00, -0x7f,0x00,0x00,0x00,0xf4,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xaa,0x02,0x00,0x00,0xa2,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0xab,0x02,0x00,0x00,0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xa8,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0xab,0x02,0x00,0x00, -0xaa,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xb2,0x02,0x00,0x00,0x7f,0x00,0x00,0x00,0xf5,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xb3,0x02,0x00,0x00, -0xa2,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0xb4,0x02,0x00,0x00,0x77,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xb2,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0xb4,0x02,0x00,0x00,0xb3,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xbd,0x02,0x00,0x00,0x57,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0xbe,0x02,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0xbf,0x02,0x00,0x00,0xbe,0x02,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xc0,0x02,0x00,0x00, -0xbf,0x02,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xc1,0x02,0x00,0x00,0xc0,0x02,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xc2,0x02,0x00,0x00,0xc1,0x02,0x00,0x00, -0x66,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xc3,0x02,0x00,0x00,0xc2,0x02,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0xc4,0x02,0x00,0x00,0xbf,0x02,0x00,0x00, -0x6a,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xc5,0x02,0x00,0x00,0xc4,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0xc6,0x02,0x00,0x00,0xc3,0x02,0x00,0x00, -0xc5,0x02,0x00,0x00,0x83,0x00,0x05,0x00,0x60,0x00,0x00,0x00, -0xc7,0x02,0x00,0x00,0xc6,0x02,0x00,0x00,0xa2,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x60,0x00,0x00,0x00,0xc8,0x02,0x00,0x00, -0xc7,0x02,0x00,0x00,0xbd,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xce,0x02,0x00,0x00,0x7f,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xd0,0x02,0x00,0x00,0xc8,0x02,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xd1,0x02,0x00,0x00, -0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xce,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0xd1,0x02,0x00,0x00,0xd0,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xd8,0x02,0x00,0x00, -0x7f,0x00,0x00,0x00,0xf6,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xd9,0x02,0x00,0x00,0xc8,0x02,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0xda,0x02,0x00,0x00,0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xd8,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0xda,0x02,0x00,0x00, -0xd9,0x02,0x00,0x00,0xf9,0x00,0x02,0x00,0x98,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x98,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, -0x38,0x00,0x01,0x00, -}; -const uint64_t dequant_q4_0_len = 8332; - -unsigned char dequant_q4_0_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x19,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, @@ -4122,758 +2535,10 @@ unsigned char dequant_q4_0_fp32_data[] = { 0x9b,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t dequant_q4_0_fp32_len = 8856; +const uint64_t dequant_q4_0_len = 8856; unsigned char dequant_q4_1_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x27,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x09,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x0c,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x14,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x14,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x50,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x51,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x52,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x52,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x54,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x78,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x79,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x79,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x79,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x7b,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x7b,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x9b,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, -0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x17,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x0a,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x0d,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0e,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x1e,0x00,0x06,0x00, -0x14,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x15,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x15,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x18,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x4a,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, -0x4f,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x1e,0x00,0x05,0x00,0x50,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x51,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x52,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x61,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0x17,0x00,0x04,0x00,0x64,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x78,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x79,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x7a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x7a,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x94,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x9a,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x2c,0x00,0x06,0x00, -0x0a,0x00,0x00,0x00,0x9b,0x00,0x00,0x00,0x9a,0x00,0x00,0x00, -0x94,0x00,0x00,0x00,0x94,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x0e,0x03,0x00,0x00,0x11,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0f,0x03,0x00,0x00, -0x12,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x10,0x03,0x00,0x00,0x13,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x11,0x03,0x00,0x00,0x14,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x12,0x03,0x00,0x00, -0x05,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x13,0x03,0x00,0x00,0x15,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x14,0x03,0x00,0x00,0x06,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x15,0x03,0x00,0x00, -0x16,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x16,0x03,0x00,0x00,0x07,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x17,0x03,0x00,0x00,0x17,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x18,0x03,0x00,0x00, -0x08,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x19,0x03,0x00,0x00,0x18,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x1a,0x03,0x00,0x00,0x09,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1b,0x03,0x00,0x00, -0x19,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1c,0x03,0x00,0x00,0x0a,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x1d,0x03,0x00,0x00,0x1a,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1e,0x03,0x00,0x00, -0x0b,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1f,0x03,0x00,0x00,0x1b,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x20,0x03,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x21,0x03,0x00,0x00, -0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x22,0x03,0x00,0x00,0x0d,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x23,0x03,0x00,0x00,0x1d,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x24,0x03,0x00,0x00, -0x0e,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x25,0x03,0x00,0x00,0x1e,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x26,0x03,0x00,0x00,0x1f,0x00,0x00,0x00, -0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x9c,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00,0x0d,0x00,0x00,0x00, -0x9d,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x9d,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0e,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x0d,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0xaf,0x00,0x05,0x00,0x24,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0xa8,0x00,0x04,0x00, -0x24,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x2c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x2a,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0x2c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x2b,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00,0x2f,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x2f,0x00,0x00,0x00, -0xaf,0x00,0x05,0x00,0x24,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x2c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x2c,0x00,0x00,0x00, -0xf5,0x00,0x07,0x00,0x24,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x9d,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x34,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x32,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x34,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x33,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x9c,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x34,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x18,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x39,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3e,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x3e,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x41,0x00,0x07,0x00, -0x56,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x58,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x56,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x61,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x68,0x00,0x00,0x00, -0x63,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x69,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x69,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x6c,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x58,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x81,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00, -0x7e,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x7f,0x00,0x00,0x00, -0x7e,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x7f,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x83,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x26,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x88,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x89,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x83,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x89,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x93,0x00,0x00,0x00,0x83,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x97,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x93,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x97,0x00,0x00,0x00,0x96,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xac,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xae,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x61,0x00,0x00,0x00, -0xaf,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0xb0,0x00,0x00,0x00, -0xaf,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xb1,0x00,0x00,0x00,0xb0,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb2,0x00,0x00,0x00,0xb1,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xb3,0x00,0x00,0x00, -0xb2,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xb4,0x00,0x00,0x00,0xb3,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0xb5,0x00,0x00,0x00, -0xb0,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xb6,0x00,0x00,0x00,0xb5,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0xb7,0x00,0x00,0x00, -0xb4,0x00,0x00,0x00,0xb6,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0xb8,0x00,0x00,0x00,0xb7,0x00,0x00,0x00, -0xac,0x00,0x00,0x00,0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0xb9,0x00,0x00,0x00,0xae,0x00,0x00,0x00,0xae,0x00,0x00,0x00, -0x81,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0xba,0x00,0x00,0x00, -0xb8,0x00,0x00,0x00,0xb9,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x83,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xc2,0x00,0x00,0x00,0xba,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xc3,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xc0,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xc3,0x00,0x00,0x00,0xc2,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xca,0x00,0x00,0x00, -0x83,0x00,0x00,0x00,0x0e,0x03,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xcb,0x00,0x00,0x00,0xba,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0xcc,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xca,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xcc,0x00,0x00,0x00, -0xcb,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xd5,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xd7,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x61,0x00,0x00,0x00,0xd8,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0xd9,0x00,0x00,0x00,0xd8,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xda,0x00,0x00,0x00, -0xd9,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xdb,0x00,0x00,0x00,0xda,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xdc,0x00,0x00,0x00,0xdb,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xdd,0x00,0x00,0x00,0xdc,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0xde,0x00,0x00,0x00,0xd9,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xdf,0x00,0x00,0x00,0xde,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0xdd,0x00,0x00,0x00, -0xdf,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0xe1,0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0xd5,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0xe2,0x00,0x00,0x00, -0xd7,0x00,0x00,0x00,0xd7,0x00,0x00,0x00,0x81,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0xe3,0x00,0x00,0x00,0xe1,0x00,0x00,0x00, -0xe2,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xe9,0x00,0x00,0x00,0x83,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xeb,0x00,0x00,0x00, -0xe3,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0xec,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xe9,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xec,0x00,0x00,0x00,0xeb,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xf3,0x00,0x00,0x00,0x83,0x00,0x00,0x00, -0x0f,0x03,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xf4,0x00,0x00,0x00,0xe3,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xf5,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xf3,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xf5,0x00,0x00,0x00,0xf4,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xfe,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x5b,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x61,0x00,0x00,0x00,0x01,0x01,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x02,0x01,0x00,0x00,0x01,0x01,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x03,0x01,0x00,0x00,0x02,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x04,0x01,0x00,0x00, -0x03,0x01,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x05,0x01,0x00,0x00,0x04,0x01,0x00,0x00,0x6a,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x06,0x01,0x00,0x00, -0x05,0x01,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0x07,0x01,0x00,0x00,0x02,0x01,0x00,0x00,0x6e,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x08,0x01,0x00,0x00, -0x07,0x01,0x00,0x00,0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0x09,0x01,0x00,0x00,0x06,0x01,0x00,0x00,0x08,0x01,0x00,0x00, -0x8e,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0x0a,0x01,0x00,0x00, -0x09,0x01,0x00,0x00,0xfe,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0x0b,0x01,0x00,0x00,0x00,0x01,0x00,0x00, -0x00,0x01,0x00,0x00,0x81,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0x0c,0x01,0x00,0x00,0x0a,0x01,0x00,0x00,0x0b,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x12,0x01,0x00,0x00, -0x83,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x14,0x01,0x00,0x00,0x0c,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x15,0x01,0x00,0x00,0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x12,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x15,0x01,0x00,0x00, -0x14,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x1c,0x01,0x00,0x00,0x83,0x00,0x00,0x00,0x10,0x03,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x1d,0x01,0x00,0x00, -0x0c,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x1e,0x01,0x00,0x00,0x7b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x1c,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x1e,0x01,0x00,0x00,0x1d,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x27,0x01,0x00,0x00,0x57,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x29,0x01,0x00,0x00, -0x5b,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x61,0x00,0x00,0x00, -0x2a,0x01,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x2b,0x01,0x00,0x00, -0x2a,0x01,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x2c,0x01,0x00,0x00,0x2b,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x2d,0x01,0x00,0x00,0x2c,0x01,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x2e,0x01,0x00,0x00, -0x2d,0x01,0x00,0x00,0x6a,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x2f,0x01,0x00,0x00,0x2e,0x01,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0x30,0x01,0x00,0x00, -0x2b,0x01,0x00,0x00,0x6e,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x31,0x01,0x00,0x00,0x30,0x01,0x00,0x00, -0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0x32,0x01,0x00,0x00, -0x2f,0x01,0x00,0x00,0x31,0x01,0x00,0x00,0x8e,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0x33,0x01,0x00,0x00,0x32,0x01,0x00,0x00, -0x27,0x01,0x00,0x00,0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0x34,0x01,0x00,0x00,0x29,0x01,0x00,0x00,0x29,0x01,0x00,0x00, -0x81,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0x35,0x01,0x00,0x00, -0x33,0x01,0x00,0x00,0x34,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3b,0x01,0x00,0x00,0x83,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x3d,0x01,0x00,0x00,0x35,0x01,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x3e,0x01,0x00,0x00, -0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3b,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x3e,0x01,0x00,0x00,0x3d,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x45,0x01,0x00,0x00, -0x83,0x00,0x00,0x00,0x11,0x03,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x46,0x01,0x00,0x00,0x35,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x47,0x01,0x00,0x00,0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x47,0x01,0x00,0x00, -0x46,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x50,0x01,0x00,0x00,0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x52,0x01,0x00,0x00,0x5b,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x61,0x00,0x00,0x00,0x53,0x01,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x12,0x03,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x54,0x01,0x00,0x00,0x53,0x01,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x55,0x01,0x00,0x00, -0x54,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x56,0x01,0x00,0x00,0x55,0x01,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x57,0x01,0x00,0x00,0x56,0x01,0x00,0x00, -0x6a,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x58,0x01,0x00,0x00,0x57,0x01,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0x59,0x01,0x00,0x00,0x54,0x01,0x00,0x00, -0x6e,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x5a,0x01,0x00,0x00,0x59,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0x5b,0x01,0x00,0x00,0x58,0x01,0x00,0x00, -0x5a,0x01,0x00,0x00,0x8e,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0x5c,0x01,0x00,0x00,0x5b,0x01,0x00,0x00,0x50,0x01,0x00,0x00, -0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0x5d,0x01,0x00,0x00, -0x52,0x01,0x00,0x00,0x52,0x01,0x00,0x00,0x81,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0x5e,0x01,0x00,0x00,0x5c,0x01,0x00,0x00, -0x5d,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x64,0x01,0x00,0x00,0x83,0x00,0x00,0x00,0x12,0x03,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x66,0x01,0x00,0x00, -0x5e,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x67,0x01,0x00,0x00,0x7b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x64,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x67,0x01,0x00,0x00,0x66,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6e,0x01,0x00,0x00,0x83,0x00,0x00,0x00, -0x13,0x03,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x6f,0x01,0x00,0x00,0x5e,0x01,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x70,0x01,0x00,0x00, -0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x6e,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x70,0x01,0x00,0x00,0x6f,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x79,0x01,0x00,0x00, -0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x7b,0x01,0x00,0x00,0x5b,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x61,0x00,0x00,0x00,0x7c,0x01,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x14,0x03,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x7d,0x01,0x00,0x00,0x7c,0x01,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x7e,0x01,0x00,0x00,0x7d,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x7f,0x01,0x00,0x00, -0x7e,0x01,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x80,0x01,0x00,0x00,0x7f,0x01,0x00,0x00,0x6a,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x81,0x01,0x00,0x00, -0x80,0x01,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0x82,0x01,0x00,0x00,0x7d,0x01,0x00,0x00,0x6e,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x83,0x01,0x00,0x00, -0x82,0x01,0x00,0x00,0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0x84,0x01,0x00,0x00,0x81,0x01,0x00,0x00,0x83,0x01,0x00,0x00, -0x8e,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0x85,0x01,0x00,0x00, -0x84,0x01,0x00,0x00,0x79,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0x86,0x01,0x00,0x00,0x7b,0x01,0x00,0x00, -0x7b,0x01,0x00,0x00,0x81,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0x87,0x01,0x00,0x00,0x85,0x01,0x00,0x00,0x86,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x8d,0x01,0x00,0x00, -0x83,0x00,0x00,0x00,0x14,0x03,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x8f,0x01,0x00,0x00,0x87,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x90,0x01,0x00,0x00,0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x8d,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x90,0x01,0x00,0x00, -0x8f,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x97,0x01,0x00,0x00,0x83,0x00,0x00,0x00,0x15,0x03,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x98,0x01,0x00,0x00, -0x87,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x99,0x01,0x00,0x00,0x7b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x97,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x99,0x01,0x00,0x00,0x98,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xa2,0x01,0x00,0x00,0x57,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xa4,0x01,0x00,0x00, -0x5b,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x61,0x00,0x00,0x00, -0xa5,0x01,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x16,0x03,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0xa6,0x01,0x00,0x00, -0xa5,0x01,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xa7,0x01,0x00,0x00,0xa6,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xa8,0x01,0x00,0x00,0xa7,0x01,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xa9,0x01,0x00,0x00, -0xa8,0x01,0x00,0x00,0x6a,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xaa,0x01,0x00,0x00,0xa9,0x01,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0xab,0x01,0x00,0x00, -0xa6,0x01,0x00,0x00,0x6e,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xac,0x01,0x00,0x00,0xab,0x01,0x00,0x00, -0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0xad,0x01,0x00,0x00, -0xaa,0x01,0x00,0x00,0xac,0x01,0x00,0x00,0x8e,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0xae,0x01,0x00,0x00,0xad,0x01,0x00,0x00, -0xa2,0x01,0x00,0x00,0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0xaf,0x01,0x00,0x00,0xa4,0x01,0x00,0x00,0xa4,0x01,0x00,0x00, -0x81,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0xb0,0x01,0x00,0x00, -0xae,0x01,0x00,0x00,0xaf,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xb6,0x01,0x00,0x00,0x83,0x00,0x00,0x00, -0x16,0x03,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xb8,0x01,0x00,0x00,0xb0,0x01,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xb9,0x01,0x00,0x00, -0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xb6,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0xb9,0x01,0x00,0x00,0xb8,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xc0,0x01,0x00,0x00, -0x83,0x00,0x00,0x00,0x17,0x03,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xc1,0x01,0x00,0x00,0xb0,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0xc2,0x01,0x00,0x00,0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xc0,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xc2,0x01,0x00,0x00, -0xc1,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xcb,0x01,0x00,0x00,0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xcd,0x01,0x00,0x00,0x5b,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x61,0x00,0x00,0x00,0xce,0x01,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x18,0x03,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0xcf,0x01,0x00,0x00,0xce,0x01,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xd0,0x01,0x00,0x00, -0xcf,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xd1,0x01,0x00,0x00,0xd0,0x01,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xd2,0x01,0x00,0x00,0xd1,0x01,0x00,0x00, -0x6a,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xd3,0x01,0x00,0x00,0xd2,0x01,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0xd4,0x01,0x00,0x00,0xcf,0x01,0x00,0x00, -0x6e,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xd5,0x01,0x00,0x00,0xd4,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0xd6,0x01,0x00,0x00,0xd3,0x01,0x00,0x00, -0xd5,0x01,0x00,0x00,0x8e,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0xd7,0x01,0x00,0x00,0xd6,0x01,0x00,0x00,0xcb,0x01,0x00,0x00, -0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0xd8,0x01,0x00,0x00, -0xcd,0x01,0x00,0x00,0xcd,0x01,0x00,0x00,0x81,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0xd9,0x01,0x00,0x00,0xd7,0x01,0x00,0x00, -0xd8,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xdf,0x01,0x00,0x00,0x83,0x00,0x00,0x00,0x18,0x03,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xe1,0x01,0x00,0x00, -0xd9,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0xe2,0x01,0x00,0x00,0x7b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xdf,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0xe2,0x01,0x00,0x00,0xe1,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xe9,0x01,0x00,0x00,0x83,0x00,0x00,0x00, -0x19,0x03,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xea,0x01,0x00,0x00,0xd9,0x01,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xeb,0x01,0x00,0x00, -0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xe9,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0xeb,0x01,0x00,0x00,0xea,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xf4,0x01,0x00,0x00, -0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xf6,0x01,0x00,0x00,0x5b,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x61,0x00,0x00,0x00,0xf7,0x01,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x1a,0x03,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0xf8,0x01,0x00,0x00,0xf7,0x01,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xf9,0x01,0x00,0x00,0xf8,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xfa,0x01,0x00,0x00, -0xf9,0x01,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xfb,0x01,0x00,0x00,0xfa,0x01,0x00,0x00,0x6a,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xfc,0x01,0x00,0x00, -0xfb,0x01,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0xfd,0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x6e,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xfe,0x01,0x00,0x00, -0xfd,0x01,0x00,0x00,0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0xff,0x01,0x00,0x00,0xfc,0x01,0x00,0x00,0xfe,0x01,0x00,0x00, -0x8e,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0x00,0x02,0x00,0x00, -0xff,0x01,0x00,0x00,0xf4,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0xf6,0x01,0x00,0x00, -0xf6,0x01,0x00,0x00,0x81,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0x02,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x08,0x02,0x00,0x00, -0x83,0x00,0x00,0x00,0x1a,0x03,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x0a,0x02,0x00,0x00,0x02,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x0b,0x02,0x00,0x00,0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x08,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x0b,0x02,0x00,0x00, -0x0a,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x12,0x02,0x00,0x00,0x83,0x00,0x00,0x00,0x1b,0x03,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x13,0x02,0x00,0x00, -0x02,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x14,0x02,0x00,0x00,0x7b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x12,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x14,0x02,0x00,0x00,0x13,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x1d,0x02,0x00,0x00,0x57,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x1f,0x02,0x00,0x00, -0x5b,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x61,0x00,0x00,0x00, -0x20,0x02,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x1c,0x03,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x21,0x02,0x00,0x00, -0x20,0x02,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x22,0x02,0x00,0x00,0x21,0x02,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x23,0x02,0x00,0x00,0x22,0x02,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x24,0x02,0x00,0x00, -0x23,0x02,0x00,0x00,0x6a,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x25,0x02,0x00,0x00,0x24,0x02,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0x26,0x02,0x00,0x00, -0x21,0x02,0x00,0x00,0x6e,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x27,0x02,0x00,0x00,0x26,0x02,0x00,0x00, -0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0x28,0x02,0x00,0x00, -0x25,0x02,0x00,0x00,0x27,0x02,0x00,0x00,0x8e,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0x29,0x02,0x00,0x00,0x28,0x02,0x00,0x00, -0x1d,0x02,0x00,0x00,0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0x2a,0x02,0x00,0x00,0x1f,0x02,0x00,0x00,0x1f,0x02,0x00,0x00, -0x81,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0x2b,0x02,0x00,0x00, -0x29,0x02,0x00,0x00,0x2a,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x31,0x02,0x00,0x00,0x83,0x00,0x00,0x00, -0x1c,0x03,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x33,0x02,0x00,0x00,0x2b,0x02,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x34,0x02,0x00,0x00, -0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x31,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x34,0x02,0x00,0x00,0x33,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3b,0x02,0x00,0x00, -0x83,0x00,0x00,0x00,0x1d,0x03,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x3c,0x02,0x00,0x00,0x2b,0x02,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x3d,0x02,0x00,0x00,0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3b,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x3d,0x02,0x00,0x00, -0x3c,0x02,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x46,0x02,0x00,0x00,0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x48,0x02,0x00,0x00,0x5b,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x61,0x00,0x00,0x00,0x49,0x02,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x1e,0x03,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x4a,0x02,0x00,0x00,0x49,0x02,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x4b,0x02,0x00,0x00, -0x4a,0x02,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x4c,0x02,0x00,0x00,0x4b,0x02,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4d,0x02,0x00,0x00,0x4c,0x02,0x00,0x00, -0x6a,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x4e,0x02,0x00,0x00,0x4d,0x02,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0x4f,0x02,0x00,0x00,0x4a,0x02,0x00,0x00, -0x6e,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x50,0x02,0x00,0x00,0x4f,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0x51,0x02,0x00,0x00,0x4e,0x02,0x00,0x00, -0x50,0x02,0x00,0x00,0x8e,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0x52,0x02,0x00,0x00,0x51,0x02,0x00,0x00,0x46,0x02,0x00,0x00, -0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0x53,0x02,0x00,0x00, -0x48,0x02,0x00,0x00,0x48,0x02,0x00,0x00,0x81,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0x54,0x02,0x00,0x00,0x52,0x02,0x00,0x00, -0x53,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x5a,0x02,0x00,0x00,0x83,0x00,0x00,0x00,0x1e,0x03,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x5c,0x02,0x00,0x00, -0x54,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x5d,0x02,0x00,0x00,0x7b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x5a,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x5d,0x02,0x00,0x00,0x5c,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x64,0x02,0x00,0x00,0x83,0x00,0x00,0x00, -0x1f,0x03,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x65,0x02,0x00,0x00,0x54,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x66,0x02,0x00,0x00, -0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x64,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x66,0x02,0x00,0x00,0x65,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x6f,0x02,0x00,0x00, -0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x71,0x02,0x00,0x00,0x5b,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x61,0x00,0x00,0x00,0x72,0x02,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x20,0x03,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x73,0x02,0x00,0x00,0x72,0x02,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x74,0x02,0x00,0x00,0x73,0x02,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x75,0x02,0x00,0x00, -0x74,0x02,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x76,0x02,0x00,0x00,0x75,0x02,0x00,0x00,0x6a,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x77,0x02,0x00,0x00, -0x76,0x02,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0x78,0x02,0x00,0x00,0x73,0x02,0x00,0x00,0x6e,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x79,0x02,0x00,0x00, -0x78,0x02,0x00,0x00,0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0x7a,0x02,0x00,0x00,0x77,0x02,0x00,0x00,0x79,0x02,0x00,0x00, -0x8e,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0x7b,0x02,0x00,0x00, -0x7a,0x02,0x00,0x00,0x6f,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0x7c,0x02,0x00,0x00,0x71,0x02,0x00,0x00, -0x71,0x02,0x00,0x00,0x81,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0x7d,0x02,0x00,0x00,0x7b,0x02,0x00,0x00,0x7c,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x83,0x02,0x00,0x00, -0x83,0x00,0x00,0x00,0x20,0x03,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x85,0x02,0x00,0x00,0x7d,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x86,0x02,0x00,0x00,0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x83,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x86,0x02,0x00,0x00, -0x85,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x8d,0x02,0x00,0x00,0x83,0x00,0x00,0x00,0x21,0x03,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x8e,0x02,0x00,0x00, -0x7d,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x8f,0x02,0x00,0x00,0x7b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x8d,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x8f,0x02,0x00,0x00,0x8e,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x98,0x02,0x00,0x00,0x57,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x9a,0x02,0x00,0x00, -0x5b,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x61,0x00,0x00,0x00, -0x9b,0x02,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x22,0x03,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x9c,0x02,0x00,0x00, -0x9b,0x02,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x9d,0x02,0x00,0x00,0x9c,0x02,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x9e,0x02,0x00,0x00,0x9d,0x02,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x9f,0x02,0x00,0x00, -0x9e,0x02,0x00,0x00,0x6a,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xa0,0x02,0x00,0x00,0x9f,0x02,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0xa1,0x02,0x00,0x00, -0x9c,0x02,0x00,0x00,0x6e,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xa2,0x02,0x00,0x00,0xa1,0x02,0x00,0x00, -0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0xa3,0x02,0x00,0x00, -0xa0,0x02,0x00,0x00,0xa2,0x02,0x00,0x00,0x8e,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0xa4,0x02,0x00,0x00,0xa3,0x02,0x00,0x00, -0x98,0x02,0x00,0x00,0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0xa5,0x02,0x00,0x00,0x9a,0x02,0x00,0x00,0x9a,0x02,0x00,0x00, -0x81,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0xa6,0x02,0x00,0x00, -0xa4,0x02,0x00,0x00,0xa5,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xac,0x02,0x00,0x00,0x83,0x00,0x00,0x00, -0x22,0x03,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xae,0x02,0x00,0x00,0xa6,0x02,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xaf,0x02,0x00,0x00, -0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xac,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0xaf,0x02,0x00,0x00,0xae,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xb6,0x02,0x00,0x00, -0x83,0x00,0x00,0x00,0x23,0x03,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xb7,0x02,0x00,0x00,0xa6,0x02,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0xb8,0x02,0x00,0x00,0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xb6,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0xb8,0x02,0x00,0x00, -0xb7,0x02,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xc1,0x02,0x00,0x00,0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xc3,0x02,0x00,0x00,0x5b,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x61,0x00,0x00,0x00,0xc4,0x02,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x24,0x03,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0xc5,0x02,0x00,0x00,0xc4,0x02,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xc6,0x02,0x00,0x00, -0xc5,0x02,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xc7,0x02,0x00,0x00,0xc6,0x02,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xc8,0x02,0x00,0x00,0xc7,0x02,0x00,0x00, -0x6a,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xc9,0x02,0x00,0x00,0xc8,0x02,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0xca,0x02,0x00,0x00,0xc5,0x02,0x00,0x00, -0x6e,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xcb,0x02,0x00,0x00,0xca,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0xcc,0x02,0x00,0x00,0xc9,0x02,0x00,0x00, -0xcb,0x02,0x00,0x00,0x8e,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0xcd,0x02,0x00,0x00,0xcc,0x02,0x00,0x00,0xc1,0x02,0x00,0x00, -0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0xce,0x02,0x00,0x00, -0xc3,0x02,0x00,0x00,0xc3,0x02,0x00,0x00,0x81,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0xcf,0x02,0x00,0x00,0xcd,0x02,0x00,0x00, -0xce,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xd5,0x02,0x00,0x00,0x83,0x00,0x00,0x00,0x24,0x03,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xd7,0x02,0x00,0x00, -0xcf,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0xd8,0x02,0x00,0x00,0x7b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xd5,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0xd8,0x02,0x00,0x00,0xd7,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xdf,0x02,0x00,0x00,0x83,0x00,0x00,0x00, -0x25,0x03,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xe0,0x02,0x00,0x00,0xcf,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xe1,0x02,0x00,0x00, -0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xdf,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0xe1,0x02,0x00,0x00,0xe0,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xea,0x02,0x00,0x00, -0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xec,0x02,0x00,0x00,0x5b,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x61,0x00,0x00,0x00,0xed,0x02,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0xee,0x02,0x00,0x00,0xed,0x02,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xef,0x02,0x00,0x00,0xee,0x02,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xf0,0x02,0x00,0x00, -0xef,0x02,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xf1,0x02,0x00,0x00,0xf0,0x02,0x00,0x00,0x6a,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xf2,0x02,0x00,0x00, -0xf1,0x02,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0xf3,0x02,0x00,0x00,0xee,0x02,0x00,0x00,0x6e,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xf4,0x02,0x00,0x00, -0xf3,0x02,0x00,0x00,0x50,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0xf5,0x02,0x00,0x00,0xf2,0x02,0x00,0x00,0xf4,0x02,0x00,0x00, -0x8e,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0xf6,0x02,0x00,0x00, -0xf5,0x02,0x00,0x00,0xea,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x64,0x00,0x00,0x00,0xf7,0x02,0x00,0x00,0xec,0x02,0x00,0x00, -0xec,0x02,0x00,0x00,0x81,0x00,0x05,0x00,0x64,0x00,0x00,0x00, -0xf8,0x02,0x00,0x00,0xf6,0x02,0x00,0x00,0xf7,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xfe,0x02,0x00,0x00, -0x83,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0xf8,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x01,0x03,0x00,0x00,0x7b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xfe,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x01,0x03,0x00,0x00, -0x00,0x03,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x08,0x03,0x00,0x00,0x83,0x00,0x00,0x00,0x26,0x03,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x09,0x03,0x00,0x00, -0xf8,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x0a,0x03,0x00,0x00,0x7b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x08,0x03,0x00,0x00,0x3e,0x00,0x03,0x00, -0x0a,0x03,0x00,0x00,0x09,0x03,0x00,0x00,0xf9,0x00,0x02,0x00, -0x9c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x9c,0x00,0x00,0x00, -0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, -}; -const uint64_t dequant_q4_1_len = 8924; - -unsigned char dequant_q4_1_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x59,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, @@ -5683,496 +3348,10 @@ unsigned char dequant_q4_1_fp32_data[] = { 0xa0,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xa0,0x00,0x00,0x00, 0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t dequant_q4_1_fp32_len = 9704; +const uint64_t dequant_q4_1_len = 9704; unsigned char dequant_q4_K_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0xa6,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x25,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x08,0x01,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x17,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x23,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x23,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x23,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x33,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x48,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x4b,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x4b,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x4b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x4c,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x4d,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x4f,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x05,0x01,0x00,0x00, -0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x06,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x06,0x01,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x06,0x01,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x08,0x01,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x08,0x01,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x31,0x01,0x00,0x00, -0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, -0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x14,0x00,0x02,0x00,0x11,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x15,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x15,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x16,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x19,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x1e,0x00,0x06,0x00,0x23,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x24,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x26,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x16,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x42,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x45,0x00,0x00,0x00,0x42,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x47,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, -0x48,0x00,0x00,0x00,0x46,0x00,0x00,0x00,0x47,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x1c,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x46,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x1e,0x00,0x05,0x00, -0x4b,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x4c,0x00,0x00,0x00, -0x4b,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x4d,0x00,0x00,0x00, -0x4c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x51,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x42,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x60,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x73,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x46,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x7a,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x97,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xc6,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xda,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x05,0x01,0x00,0x00,0x42,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x06,0x01,0x00,0x00,0x05,0x01,0x00,0x00, -0x20,0x00,0x04,0x00,0x07,0x01,0x00,0x00,0x0c,0x00,0x00,0x00, -0x06,0x01,0x00,0x00,0x3b,0x00,0x04,0x00,0x07,0x01,0x00,0x00, -0x08,0x01,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x30,0x01,0x00,0x00,0x20,0x00,0x00,0x00, -0x2c,0x00,0x06,0x00,0x15,0x00,0x00,0x00,0x31,0x01,0x00,0x00, -0x30,0x01,0x00,0x00,0x56,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x2a,0x00,0x03,0x00,0x11,0x00,0x00,0x00,0x34,0x01,0x00,0x00, -0x29,0x00,0x03,0x00,0x11,0x00,0x00,0x00,0x37,0x01,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x9f,0x01,0x00,0x00, -0x21,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xa2,0x01,0x00,0x00,0x22,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xa5,0x01,0x00,0x00,0x23,0x00,0x00,0x00, -0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x32,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00,0x18,0x00,0x00,0x00, -0x33,0x01,0x00,0x00,0xf8,0x00,0x02,0x00,0x33,0x01,0x00,0x00, -0xf9,0x00,0x02,0x00,0x0a,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x0a,0x00,0x00,0x00,0xf5,0x00,0x07,0x00,0x06,0x00,0x00,0x00, -0x3a,0x01,0x00,0x00,0x09,0x00,0x00,0x00,0x33,0x01,0x00,0x00, -0x2f,0x01,0x00,0x00,0x0d,0x00,0x00,0x00,0xb1,0x00,0x05,0x00, -0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x3a,0x01,0x00,0x00, -0x10,0x00,0x00,0x00,0xf6,0x00,0x04,0x00,0x0c,0x00,0x00,0x00, -0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x12,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x0b,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x19,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x14,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x3a,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x14,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0x25,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x28,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x26,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x25,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,0x28,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0xaf,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x30,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x2e,0x00,0x00,0x00,0x2f,0x00,0x00,0x00,0x30,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x2f,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x0c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x30,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x19,0x00,0x00,0x00,0x34,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x35,0x00,0x00,0x00,0x34,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x36,0x00,0x00,0x00, -0x35,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x36,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3d,0x00,0x00,0x00, -0x36,0x00,0x00,0x00,0x39,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x51,0x00,0x00,0x00, -0x52,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x42,0x00,0x00,0x00,0x53,0x00,0x00,0x00, -0x52,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x51,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x42,0x00,0x00,0x00,0x58,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x3d,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0xb1,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x6e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x6c,0x00,0x00,0x00,0x6d,0x00,0x00,0x00,0x88,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x6d,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x73,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x75,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x75,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x72,0x00,0x04,0x00,0x7a,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x7c,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x73,0x00,0x00,0x00, -0x81,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x80,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0x82,0x00,0x00,0x00, -0x81,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x83,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x84,0x00,0x00,0x00,0x83,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x85,0x00,0x00,0x00, -0x84,0x00,0x00,0x00,0x78,0x00,0x00,0x00,0x72,0x00,0x04,0x00, -0x7a,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x85,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0x87,0x00,0x00,0x00, -0x86,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x6e,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x88,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x8b,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x73,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x8b,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x8e,0x00,0x00,0x00,0x8d,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x8f,0x00,0x00,0x00,0x8e,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x91,0x00,0x00,0x00, -0x8f,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x94,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x73,0x00,0x00,0x00, -0x95,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x94,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0x95,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x46,0x00,0x00,0x00, -0x98,0x00,0x00,0x00,0x96,0x00,0x00,0x00,0x97,0x00,0x00,0x00, -0xc4,0x00,0x05,0x00,0x46,0x00,0x00,0x00,0x99,0x00,0x00,0x00, -0x98,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x9a,0x00,0x00,0x00,0x99,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x9b,0x00,0x00,0x00, -0x9a,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x9c,0x00,0x00,0x00,0x91,0x00,0x00,0x00,0x9b,0x00,0x00,0x00, -0x72,0x00,0x04,0x00,0x7a,0x00,0x00,0x00,0x9d,0x00,0x00,0x00, -0x9c,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x9e,0x00,0x00,0x00,0x9d,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0xa3,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x46,0x00,0x00,0x00,0xa4,0x00,0x00,0x00, -0xa3,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x73,0x00,0x00,0x00,0xa7,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0xa8,0x00,0x00,0x00,0xa7,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x46,0x00,0x00,0x00,0xa9,0x00,0x00,0x00,0xa8,0x00,0x00,0x00, -0x97,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x46,0x00,0x00,0x00, -0xaa,0x00,0x00,0x00,0xa9,0x00,0x00,0x00,0x60,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x46,0x00,0x00,0x00,0xab,0x00,0x00,0x00, -0xa4,0x00,0x00,0x00,0xaa,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x6e,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x6e,0x00,0x00,0x00, -0xf5,0x00,0x07,0x00,0x46,0x00,0x00,0x00,0x3c,0x01,0x00,0x00, -0x87,0x00,0x00,0x00,0x6d,0x00,0x00,0x00,0xab,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0xf5,0x00,0x07,0x00,0x46,0x00,0x00,0x00, -0x3b,0x01,0x00,0x00,0x7c,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, -0x9e,0x00,0x00,0x00,0x88,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x42,0x00,0x00,0x00,0xaf,0x00,0x00,0x00,0x3b,0x01,0x00,0x00, -0x85,0x00,0x05,0x00,0x42,0x00,0x00,0x00,0xb0,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0xaf,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x42,0x00,0x00,0x00,0xb4,0x00,0x00,0x00,0x3c,0x01,0x00,0x00, -0x85,0x00,0x05,0x00,0x42,0x00,0x00,0x00,0xb5,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0xb4,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0xb9,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x6c,0x00,0x00,0x00,0xb8,0x00,0x00,0x00,0xcf,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xb8,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xbc,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x73,0x00,0x00,0x00, -0xbd,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0xbc,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0xbe,0x00,0x00,0x00, -0xbd,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0xbf,0x00,0x00,0x00,0xbe,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0xbf,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xc1,0x00,0x00,0x00, -0xc0,0x00,0x00,0x00,0x78,0x00,0x00,0x00,0x72,0x00,0x04,0x00, -0x7a,0x00,0x00,0x00,0xc2,0x00,0x00,0x00,0xc1,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0xc3,0x00,0x00,0x00, -0xc2,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xc7,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0xc6,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x73,0x00,0x00,0x00,0xc8,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0xc7,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0xc9,0x00,0x00,0x00,0xc8,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0xca,0x00,0x00,0x00, -0xc9,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xcb,0x00,0x00,0x00,0xca,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xcc,0x00,0x00,0x00,0xcb,0x00,0x00,0x00, -0x78,0x00,0x00,0x00,0x72,0x00,0x04,0x00,0x7a,0x00,0x00,0x00, -0xcd,0x00,0x00,0x00,0xcc,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0xce,0x00,0x00,0x00,0xcd,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0xb9,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0xcf,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xd2,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0xc6,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x73,0x00,0x00,0x00,0xd3,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0xd2,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0xd4,0x00,0x00,0x00,0xd3,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0xd5,0x00,0x00,0x00, -0xd4,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xd6,0x00,0x00,0x00,0xd5,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xd7,0x00,0x00,0x00,0xd6,0x00,0x00,0x00, -0x90,0x00,0x00,0x00,0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xdb,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0xda,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x73,0x00,0x00,0x00,0xdc,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0xdb,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0xdd,0x00,0x00,0x00,0xdc,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x46,0x00,0x00,0x00,0xde,0x00,0x00,0x00, -0xdd,0x00,0x00,0x00,0x97,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x46,0x00,0x00,0x00,0xdf,0x00,0x00,0x00,0xde,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0xe0,0x00,0x00,0x00,0xdf,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xe1,0x00,0x00,0x00,0xe0,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xe2,0x00,0x00,0x00, -0xd7,0x00,0x00,0x00,0xe1,0x00,0x00,0x00,0x72,0x00,0x04,0x00, -0x7a,0x00,0x00,0x00,0xe3,0x00,0x00,0x00,0xe2,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0xe4,0x00,0x00,0x00, -0xe3,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0xe9,0x00,0x00,0x00,0xd3,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x46,0x00,0x00,0x00,0xea,0x00,0x00,0x00,0xe9,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xed,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x73,0x00,0x00,0x00,0xee,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0xed,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0xef,0x00,0x00,0x00,0xee,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x46,0x00,0x00,0x00,0xf0,0x00,0x00,0x00, -0xef,0x00,0x00,0x00,0x97,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x46,0x00,0x00,0x00,0xf1,0x00,0x00,0x00,0xf0,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x46,0x00,0x00,0x00, -0xf2,0x00,0x00,0x00,0xea,0x00,0x00,0x00,0xf1,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0xb9,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0xb9,0x00,0x00,0x00,0xf5,0x00,0x07,0x00,0x46,0x00,0x00,0x00, -0x3e,0x01,0x00,0x00,0xce,0x00,0x00,0x00,0xb8,0x00,0x00,0x00, -0xf2,0x00,0x00,0x00,0xcf,0x00,0x00,0x00,0xf5,0x00,0x07,0x00, -0x46,0x00,0x00,0x00,0x3d,0x01,0x00,0x00,0xc3,0x00,0x00,0x00, -0xb8,0x00,0x00,0x00,0xe4,0x00,0x00,0x00,0xcf,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x42,0x00,0x00,0x00,0xf6,0x00,0x00,0x00, -0x3d,0x01,0x00,0x00,0x85,0x00,0x05,0x00,0x42,0x00,0x00,0x00, -0xf7,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0xf6,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x42,0x00,0x00,0x00,0xfb,0x00,0x00,0x00, -0x3e,0x01,0x00,0x00,0x85,0x00,0x05,0x00,0x42,0x00,0x00,0x00, -0xfc,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0xfb,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x73,0x00,0x00,0x00,0x11,0x01,0x00,0x00, -0x4f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0x12,0x01,0x00,0x00,0x11,0x01,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x13,0x01,0x00,0x00, -0x12,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x14,0x01,0x00,0x00,0x13,0x01,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x15,0x01,0x00,0x00,0x14,0x01,0x00,0x00, -0x90,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x42,0x00,0x00,0x00, -0x16,0x01,0x00,0x00,0x15,0x01,0x00,0x00,0x7f,0x00,0x04,0x00, -0x42,0x00,0x00,0x00,0x9c,0x01,0x00,0x00,0xb5,0x00,0x00,0x00, -0x0c,0x00,0x08,0x00,0x42,0x00,0x00,0x00,0x19,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0xb0,0x00,0x00,0x00, -0x16,0x01,0x00,0x00,0x9c,0x01,0x00,0x00,0x41,0x00,0x06,0x00, -0x51,0x00,0x00,0x00,0x1a,0x01,0x00,0x00,0x08,0x01,0x00,0x00, -0x09,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x1a,0x01,0x00,0x00,0x19,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x1e,0x01,0x00,0x00,0x63,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x25,0x01,0x00,0x00,0x11,0x01,0x00,0x00,0xc2,0x00,0x05,0x00, -0x46,0x00,0x00,0x00,0x26,0x01,0x00,0x00,0x25,0x01,0x00,0x00, -0x60,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x42,0x00,0x00,0x00, -0x27,0x01,0x00,0x00,0x26,0x01,0x00,0x00,0x7f,0x00,0x04,0x00, -0x42,0x00,0x00,0x00,0x9d,0x01,0x00,0x00,0xfc,0x00,0x00,0x00, -0x0c,0x00,0x08,0x00,0x42,0x00,0x00,0x00,0x2a,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0xf7,0x00,0x00,0x00, -0x27,0x01,0x00,0x00,0x9d,0x01,0x00,0x00,0x41,0x00,0x06,0x00, -0x51,0x00,0x00,0x00,0x2b,0x01,0x00,0x00,0x08,0x01,0x00,0x00, -0x09,0x00,0x00,0x00,0x1e,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x2b,0x01,0x00,0x00,0x2a,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4d,0x01,0x00,0x00,0x63,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4e,0x01,0x00,0x00,0x6a,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x73,0x00,0x00,0x00,0x4f,0x01,0x00,0x00, -0x4f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x4e,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0x50,0x01,0x00,0x00,0x4f,0x01,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x51,0x01,0x00,0x00, -0x50,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x52,0x01,0x00,0x00,0x51,0x01,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x53,0x01,0x00,0x00,0x52,0x01,0x00,0x00, -0x90,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x42,0x00,0x00,0x00, -0x54,0x01,0x00,0x00,0x53,0x01,0x00,0x00,0x0c,0x00,0x08,0x00, -0x42,0x00,0x00,0x00,0x56,0x01,0x00,0x00,0x01,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0xb0,0x00,0x00,0x00,0x54,0x01,0x00,0x00, -0x9c,0x01,0x00,0x00,0x41,0x00,0x06,0x00,0x51,0x00,0x00,0x00, -0x57,0x01,0x00,0x00,0x08,0x01,0x00,0x00,0x09,0x00,0x00,0x00, -0x4d,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x57,0x01,0x00,0x00, -0x56,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x59,0x01,0x00,0x00,0x63,0x00,0x00,0x00,0x9f,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0x5c,0x01,0x00,0x00, -0x4f,0x01,0x00,0x00,0xc2,0x00,0x05,0x00,0x46,0x00,0x00,0x00, -0x5d,0x01,0x00,0x00,0x5c,0x01,0x00,0x00,0x60,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x42,0x00,0x00,0x00,0x5e,0x01,0x00,0x00, -0x5d,0x01,0x00,0x00,0x0c,0x00,0x08,0x00,0x42,0x00,0x00,0x00, -0x60,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0xf7,0x00,0x00,0x00,0x5e,0x01,0x00,0x00,0x9d,0x01,0x00,0x00, -0x41,0x00,0x06,0x00,0x51,0x00,0x00,0x00,0x61,0x01,0x00,0x00, -0x08,0x01,0x00,0x00,0x09,0x00,0x00,0x00,0x59,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x61,0x01,0x00,0x00,0x60,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x69,0x01,0x00,0x00, -0x63,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6a,0x01,0x00,0x00,0x6a,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x73,0x00,0x00,0x00, -0x6b,0x01,0x00,0x00,0x4f,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x6a,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0x6c,0x01,0x00,0x00, -0x6b,0x01,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x6d,0x01,0x00,0x00,0x6c,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x6e,0x01,0x00,0x00,0x6d,0x01,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x6f,0x01,0x00,0x00, -0x6e,0x01,0x00,0x00,0x90,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x42,0x00,0x00,0x00,0x70,0x01,0x00,0x00,0x6f,0x01,0x00,0x00, -0x0c,0x00,0x08,0x00,0x42,0x00,0x00,0x00,0x72,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0xb0,0x00,0x00,0x00, -0x70,0x01,0x00,0x00,0x9c,0x01,0x00,0x00,0x41,0x00,0x06,0x00, -0x51,0x00,0x00,0x00,0x73,0x01,0x00,0x00,0x08,0x01,0x00,0x00, -0x09,0x00,0x00,0x00,0x69,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x73,0x01,0x00,0x00,0x72,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x75,0x01,0x00,0x00,0x63,0x00,0x00,0x00, -0xa2,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x78,0x01,0x00,0x00,0x6b,0x01,0x00,0x00,0xc2,0x00,0x05,0x00, -0x46,0x00,0x00,0x00,0x79,0x01,0x00,0x00,0x78,0x01,0x00,0x00, -0x60,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x42,0x00,0x00,0x00, -0x7a,0x01,0x00,0x00,0x79,0x01,0x00,0x00,0x0c,0x00,0x08,0x00, -0x42,0x00,0x00,0x00,0x7c,0x01,0x00,0x00,0x01,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0xf7,0x00,0x00,0x00,0x7a,0x01,0x00,0x00, -0x9d,0x01,0x00,0x00,0x41,0x00,0x06,0x00,0x51,0x00,0x00,0x00, -0x7d,0x01,0x00,0x00,0x08,0x01,0x00,0x00,0x09,0x00,0x00,0x00, -0x75,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x7d,0x01,0x00,0x00, -0x7c,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x85,0x01,0x00,0x00,0x63,0x00,0x00,0x00,0xda,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x86,0x01,0x00,0x00, -0x6a,0x00,0x00,0x00,0xda,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x73,0x00,0x00,0x00,0x87,0x01,0x00,0x00,0x4f,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x86,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x88,0x01,0x00,0x00,0x87,0x01,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x89,0x01,0x00,0x00,0x88,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x8a,0x01,0x00,0x00, -0x89,0x01,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x8b,0x01,0x00,0x00,0x8a,0x01,0x00,0x00,0x90,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x42,0x00,0x00,0x00,0x8c,0x01,0x00,0x00, -0x8b,0x01,0x00,0x00,0x0c,0x00,0x08,0x00,0x42,0x00,0x00,0x00, -0x8e,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0xb0,0x00,0x00,0x00,0x8c,0x01,0x00,0x00,0x9c,0x01,0x00,0x00, -0x41,0x00,0x06,0x00,0x51,0x00,0x00,0x00,0x8f,0x01,0x00,0x00, -0x08,0x01,0x00,0x00,0x09,0x00,0x00,0x00,0x85,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x8f,0x01,0x00,0x00,0x8e,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x91,0x01,0x00,0x00, -0x63,0x00,0x00,0x00,0xa5,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0x94,0x01,0x00,0x00,0x87,0x01,0x00,0x00, -0xc2,0x00,0x05,0x00,0x46,0x00,0x00,0x00,0x95,0x01,0x00,0x00, -0x94,0x01,0x00,0x00,0x60,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x42,0x00,0x00,0x00,0x96,0x01,0x00,0x00,0x95,0x01,0x00,0x00, -0x0c,0x00,0x08,0x00,0x42,0x00,0x00,0x00,0x98,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0xf7,0x00,0x00,0x00, -0x96,0x01,0x00,0x00,0x9d,0x01,0x00,0x00,0x41,0x00,0x06,0x00, -0x51,0x00,0x00,0x00,0x99,0x01,0x00,0x00,0x08,0x01,0x00,0x00, -0x09,0x00,0x00,0x00,0x91,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x99,0x01,0x00,0x00,0x98,0x01,0x00,0x00,0xf9,0x00,0x02,0x00, -0x0d,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x0d,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x2f,0x01,0x00,0x00, -0x3a,0x01,0x00,0x00,0x29,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x0a,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x0c,0x00,0x00,0x00, -0xf5,0x00,0x07,0x00,0x11,0x00,0x00,0x00,0x43,0x01,0x00,0x00, -0x34,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x37,0x01,0x00,0x00, -0x2f,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x38,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x43,0x01,0x00,0x00, -0x32,0x01,0x00,0x00,0x38,0x01,0x00,0x00,0xf8,0x00,0x02,0x00, -0x38,0x01,0x00,0x00,0xf9,0x00,0x02,0x00,0x32,0x01,0x00,0x00, -0xf8,0x00,0x02,0x00,0x32,0x01,0x00,0x00,0xfd,0x00,0x01,0x00, -0x38,0x00,0x01,0x00, -}; -const uint64_t dequant_q4_K_len = 5776; - -unsigned char dequant_q4_K_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0xb1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00, 0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00, @@ -6669,1134 +3848,10 @@ unsigned char dequant_q4_K_fp32_data[] = { 0x37,0x01,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t dequant_q4_K_fp32_len = 5940; +const uint64_t dequant_q4_K_len = 5940; unsigned char dequant_q5_0_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x7a,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x09,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0xa1,0x00,0x00,0x00, -0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x0c,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x14,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x14,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x53,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x53,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x54,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x55,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x55,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x9e,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x9f,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x9f,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x9f,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0xa1,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0xa1,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0xc0,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, -0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0a,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x0d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0e,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x14,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x15,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x15,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x18,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x14,0x00,0x02,0x00, -0x24,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x4a,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x52,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x51,0x00,0x00,0x00,0x1e,0x00,0x05,0x00,0x53,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x54,0x00,0x00,0x00,0x53,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x55,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x56,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x59,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x5f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x4d,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x7f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x82,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x88,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x92,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x99,0x00,0x00,0x00,0x00,0x4c,0x00,0x00,0x1d,0x00,0x03,0x00, -0x9e,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x9f,0x00,0x00,0x00,0x9e,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0xa0,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x9f,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0xa0,0x00,0x00,0x00,0xa1,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xa3,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xbf,0x00,0x00,0x00,0x00,0x01,0x00,0x00, -0x2c,0x00,0x06,0x00,0x0a,0x00,0x00,0x00,0xc0,0x00,0x00,0x00, -0xbf,0x00,0x00,0x00,0x92,0x00,0x00,0x00,0x92,0x00,0x00,0x00, -0x2c,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0xcd,0x00,0x00,0x00, -0x99,0x00,0x00,0x00,0x99,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x62,0x04,0x00,0x00,0x0d,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x63,0x04,0x00,0x00, -0x11,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x64,0x04,0x00,0x00,0x0e,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x65,0x04,0x00,0x00,0x12,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x66,0x04,0x00,0x00, -0x13,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x67,0x04,0x00,0x00,0x14,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x68,0x04,0x00,0x00,0x05,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x69,0x04,0x00,0x00, -0x15,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x6a,0x04,0x00,0x00,0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x6b,0x04,0x00,0x00,0x16,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x6c,0x04,0x00,0x00, -0x07,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x6d,0x04,0x00,0x00,0x17,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x6e,0x04,0x00,0x00,0x08,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x6f,0x04,0x00,0x00, -0x18,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x70,0x04,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x71,0x04,0x00,0x00,0x19,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x72,0x04,0x00,0x00, -0x0a,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x73,0x04,0x00,0x00,0x1a,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x74,0x04,0x00,0x00,0x0b,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x75,0x04,0x00,0x00, -0x1b,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x76,0x04,0x00,0x00,0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x77,0x04,0x00,0x00,0x1d,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x78,0x04,0x00,0x00, -0x1e,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x79,0x04,0x00,0x00,0x1f,0x00,0x00,0x00,0x36,0x00,0x05,0x00, -0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0xc1,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x0d,0x00,0x00,0x00,0xc2,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xc2,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x0e,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x0d,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x8b,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x26,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0xaf,0x00,0x05,0x00, -0x24,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x26,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0xa8,0x00,0x04,0x00,0x24,0x00,0x00,0x00, -0x2a,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x2c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x2a,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0x2c,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x2b,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x18,0x00,0x00,0x00,0x2f,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x30,0x00,0x00,0x00,0x2f,0x00,0x00,0x00,0xaf,0x00,0x05,0x00, -0x24,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x30,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x2c,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x2c,0x00,0x00,0x00,0xf5,0x00,0x07,0x00, -0x24,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0xc2,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x32,0x00,0x00,0x00,0x33,0x00,0x00,0x00, -0x34,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x33,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0xc1,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x34,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x39,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3e,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x3e,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x59,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5f,0x00,0x00,0x00,0x60,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x61,0x00,0x00,0x00,0x60,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x61,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5f,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x71,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0x71,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x75,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00,0x80,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x81,0x00,0x00,0x00,0x80,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x86,0x00,0x00,0x00, -0x81,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x87,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0x87,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0x8f,0x00,0x00,0x00,0x81,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x90,0x00,0x00,0x00, -0x8f,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x91,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x95,0x00,0x00,0x00,0x91,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x96,0x00,0x00,0x00,0x95,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0x97,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0x96,0x00,0x00,0x00,0x83,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0x9b,0x00,0x00,0x00,0x97,0x00,0x00,0x00,0xcd,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x9d,0x00,0x00,0x00, -0x9b,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x18,0x00,0x00,0x00,0xa4,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0xa3,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xa5,0x00,0x00,0x00,0xa4,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa6,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0xa5,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xa9,0x00,0x00,0x00,0xa6,0x00,0x00,0x00,0x26,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xae,0x00,0x00,0x00, -0x9d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x59,0x00,0x00,0x00,0xaf,0x00,0x00,0x00,0xa1,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xa9,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xaf,0x00,0x00,0x00,0xae,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xb9,0x00,0x00,0x00,0xa9,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xbb,0x00,0x00,0x00,0x9d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00,0xbc,0x00,0x00,0x00, -0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xb9,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xbc,0x00,0x00,0x00,0xbb,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xd4,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0xd6,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xd7,0x00,0x00,0x00,0xd6,0x00,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xd8,0x00,0x00,0x00, -0xd7,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0xda,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xdb,0x00,0x00,0x00, -0xda,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xdc,0x00,0x00,0x00,0xd8,0x00,0x00,0x00,0xdb,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xdd,0x00,0x00,0x00, -0xdc,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xde,0x00,0x00,0x00,0xdd,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xdf,0x00,0x00,0x00,0xde,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xe0,0x00,0x00,0x00, -0xdf,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xe2,0x00,0x00,0x00,0xdc,0x00,0x00,0x00,0x62,0x04,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xe3,0x00,0x00,0x00, -0xe2,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xe4,0x00,0x00,0x00,0xe3,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00,0xe6,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0xe7,0x00,0x00,0x00,0xe6,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xe8,0x00,0x00,0x00, -0xe7,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xe9,0x00,0x00,0x00,0xe8,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xea,0x00,0x00,0x00,0xe9,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xec,0x00,0x00,0x00,0xea,0x00,0x00,0x00,0xe0,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xed,0x00,0x00,0x00, -0xec,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0xee,0x00,0x00,0x00,0xe7,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xef,0x00,0x00,0x00, -0xee,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xf0,0x00,0x00,0x00,0xef,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xf2,0x00,0x00,0x00,0xf0,0x00,0x00,0x00, -0xe4,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xf3,0x00,0x00,0x00,0xf2,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0xf4,0x00,0x00,0x00,0xed,0x00,0x00,0x00, -0xf3,0x00,0x00,0x00,0x83,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0xf5,0x00,0x00,0x00,0xf4,0x00,0x00,0x00,0xcd,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0xf6,0x00,0x00,0x00, -0xf5,0x00,0x00,0x00,0xd4,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xfc,0x00,0x00,0x00,0xa9,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xfe,0x00,0x00,0x00,0xf6,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00,0xff,0x00,0x00,0x00, -0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xfc,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xff,0x00,0x00,0x00,0xfe,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x06,0x01,0x00,0x00, -0xa9,0x00,0x00,0x00,0x63,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x07,0x01,0x00,0x00,0xf6,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00, -0x08,0x01,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x06,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x08,0x01,0x00,0x00, -0x07,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x11,0x01,0x00,0x00,0x5a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x13,0x01,0x00,0x00,0x60,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x14,0x01,0x00,0x00, -0x13,0x01,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x15,0x01,0x00,0x00,0x14,0x01,0x00,0x00,0x48,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x17,0x01,0x00,0x00, -0x65,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x18,0x01,0x00,0x00,0x17,0x01,0x00,0x00,0xc5,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x19,0x01,0x00,0x00,0x15,0x01,0x00,0x00, -0x18,0x01,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x1a,0x01,0x00,0x00,0x19,0x01,0x00,0x00,0x37,0x00,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x1b,0x01,0x00,0x00, -0x1a,0x01,0x00,0x00,0x6f,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x1c,0x01,0x00,0x00,0x1b,0x01,0x00,0x00, -0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1d,0x01,0x00,0x00,0x1c,0x01,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x1f,0x01,0x00,0x00,0x19,0x01,0x00,0x00, -0x64,0x04,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x20,0x01,0x00,0x00,0x1f,0x01,0x00,0x00,0x51,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x21,0x01,0x00,0x00, -0x20,0x01,0x00,0x00,0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00, -0x23,0x01,0x00,0x00,0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x24,0x01,0x00,0x00, -0x23,0x01,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x25,0x01,0x00,0x00,0x24,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x26,0x01,0x00,0x00,0x25,0x01,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x27,0x01,0x00,0x00, -0x26,0x01,0x00,0x00,0x88,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x29,0x01,0x00,0x00,0x27,0x01,0x00,0x00, -0x1d,0x01,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x2a,0x01,0x00,0x00,0x29,0x01,0x00,0x00,0xc2,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0x2b,0x01,0x00,0x00,0x24,0x01,0x00,0x00, -0x6f,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x2c,0x01,0x00,0x00,0x2b,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x2d,0x01,0x00,0x00,0x2c,0x01,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x2f,0x01,0x00,0x00, -0x2d,0x01,0x00,0x00,0x21,0x01,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x30,0x01,0x00,0x00,0x2f,0x01,0x00,0x00, -0x50,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x31,0x01,0x00,0x00, -0x2a,0x01,0x00,0x00,0x30,0x01,0x00,0x00,0x83,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0x32,0x01,0x00,0x00,0x31,0x01,0x00,0x00, -0xcd,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0x33,0x01,0x00,0x00,0x32,0x01,0x00,0x00,0x11,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x39,0x01,0x00,0x00, -0xa9,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x3b,0x01,0x00,0x00,0x33,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00, -0x3c,0x01,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x39,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x3c,0x01,0x00,0x00, -0x3b,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x43,0x01,0x00,0x00,0xa9,0x00,0x00,0x00,0x65,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x44,0x01,0x00,0x00, -0x33,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x59,0x00,0x00,0x00,0x45,0x01,0x00,0x00,0xa1,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x43,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x45,0x01,0x00,0x00,0x44,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x4e,0x01,0x00,0x00,0x5a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x50,0x01,0x00,0x00, -0x60,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x51,0x01,0x00,0x00,0x50,0x01,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x52,0x01,0x00,0x00,0x51,0x01,0x00,0x00, -0x48,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x54,0x01,0x00,0x00,0x65,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x55,0x01,0x00,0x00,0x54,0x01,0x00,0x00, -0xc5,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x56,0x01,0x00,0x00, -0x52,0x01,0x00,0x00,0x55,0x01,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x57,0x01,0x00,0x00,0x56,0x01,0x00,0x00, -0xa3,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x58,0x01,0x00,0x00,0x57,0x01,0x00,0x00,0x6f,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x59,0x01,0x00,0x00, -0x58,0x01,0x00,0x00,0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x5a,0x01,0x00,0x00,0x59,0x01,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x5c,0x01,0x00,0x00, -0x56,0x01,0x00,0x00,0x88,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x5d,0x01,0x00,0x00,0x5c,0x01,0x00,0x00, -0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x5e,0x01,0x00,0x00,0x5d,0x01,0x00,0x00,0x41,0x00,0x08,0x00, -0x7f,0x00,0x00,0x00,0x60,0x01,0x00,0x00,0x57,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0xa3,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x61,0x01,0x00,0x00,0x60,0x01,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x62,0x01,0x00,0x00,0x61,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x63,0x01,0x00,0x00, -0x62,0x01,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x64,0x01,0x00,0x00,0x63,0x01,0x00,0x00,0x88,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x66,0x01,0x00,0x00, -0x64,0x01,0x00,0x00,0x5a,0x01,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x67,0x01,0x00,0x00,0x66,0x01,0x00,0x00, -0xc2,0x00,0x05,0x00,0x50,0x00,0x00,0x00,0x68,0x01,0x00,0x00, -0x61,0x01,0x00,0x00,0x6f,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x69,0x01,0x00,0x00,0x68,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x6a,0x01,0x00,0x00, -0x69,0x01,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x6c,0x01,0x00,0x00,0x6a,0x01,0x00,0x00,0x5e,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x6d,0x01,0x00,0x00, -0x6c,0x01,0x00,0x00,0x50,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0x6e,0x01,0x00,0x00,0x67,0x01,0x00,0x00,0x6d,0x01,0x00,0x00, -0x83,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x6f,0x01,0x00,0x00, -0x6e,0x01,0x00,0x00,0xcd,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0x70,0x01,0x00,0x00,0x6f,0x01,0x00,0x00, -0x4e,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x76,0x01,0x00,0x00,0xa9,0x00,0x00,0x00,0xa3,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x78,0x01,0x00,0x00, -0x70,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x59,0x00,0x00,0x00,0x79,0x01,0x00,0x00,0xa1,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x76,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x79,0x01,0x00,0x00,0x78,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x80,0x01,0x00,0x00,0xa9,0x00,0x00,0x00, -0x66,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x81,0x01,0x00,0x00,0x70,0x01,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00,0x82,0x01,0x00,0x00, -0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x80,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x82,0x01,0x00,0x00,0x81,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x8b,0x01,0x00,0x00, -0x5a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x8d,0x01,0x00,0x00,0x60,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x8e,0x01,0x00,0x00,0x8d,0x01,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x8f,0x01,0x00,0x00, -0x8e,0x01,0x00,0x00,0x48,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x91,0x01,0x00,0x00,0x65,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x92,0x01,0x00,0x00, -0x91,0x01,0x00,0x00,0xc5,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x93,0x01,0x00,0x00,0x8f,0x01,0x00,0x00,0x92,0x01,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x94,0x01,0x00,0x00, -0x93,0x01,0x00,0x00,0x6f,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x95,0x01,0x00,0x00,0x94,0x01,0x00,0x00, -0x6f,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x96,0x01,0x00,0x00,0x95,0x01,0x00,0x00,0x51,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x97,0x01,0x00,0x00, -0x96,0x01,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x99,0x01,0x00,0x00,0x93,0x01,0x00,0x00,0x48,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x9a,0x01,0x00,0x00, -0x99,0x01,0x00,0x00,0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x9b,0x01,0x00,0x00,0x9a,0x01,0x00,0x00, -0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00,0x9d,0x01,0x00,0x00, -0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x9e,0x01,0x00,0x00,0x9d,0x01,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x9f,0x01,0x00,0x00, -0x9e,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xa0,0x01,0x00,0x00,0x9f,0x01,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa1,0x01,0x00,0x00,0xa0,0x01,0x00,0x00, -0x88,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xa3,0x01,0x00,0x00,0xa1,0x01,0x00,0x00,0x97,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xa4,0x01,0x00,0x00, -0xa3,0x01,0x00,0x00,0xc2,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0xa5,0x01,0x00,0x00,0x9e,0x01,0x00,0x00,0x6f,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xa6,0x01,0x00,0x00, -0xa5,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xa7,0x01,0x00,0x00,0xa6,0x01,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa9,0x01,0x00,0x00,0xa7,0x01,0x00,0x00, -0x9b,0x01,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xaa,0x01,0x00,0x00,0xa9,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0xab,0x01,0x00,0x00,0xa4,0x01,0x00,0x00, -0xaa,0x01,0x00,0x00,0x83,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0xac,0x01,0x00,0x00,0xab,0x01,0x00,0x00,0xcd,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0xad,0x01,0x00,0x00, -0xac,0x01,0x00,0x00,0x8b,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xb3,0x01,0x00,0x00,0xa9,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xb5,0x01,0x00,0x00,0xad,0x01,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00,0xb6,0x01,0x00,0x00, -0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xb3,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0xb6,0x01,0x00,0x00,0xb5,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xbd,0x01,0x00,0x00, -0xa9,0x00,0x00,0x00,0x67,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xbe,0x01,0x00,0x00,0xad,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00, -0xbf,0x01,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xbd,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xbf,0x01,0x00,0x00, -0xbe,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xc8,0x01,0x00,0x00,0x5a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0xca,0x01,0x00,0x00,0x60,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xcb,0x01,0x00,0x00, -0xca,0x01,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xcc,0x01,0x00,0x00,0xcb,0x01,0x00,0x00,0x48,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0xce,0x01,0x00,0x00, -0x65,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xcf,0x01,0x00,0x00,0xce,0x01,0x00,0x00,0xc5,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xd0,0x01,0x00,0x00,0xcc,0x01,0x00,0x00, -0xcf,0x01,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xd1,0x01,0x00,0x00,0xd0,0x01,0x00,0x00,0x68,0x04,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xd2,0x01,0x00,0x00, -0xd1,0x01,0x00,0x00,0x6f,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xd3,0x01,0x00,0x00,0xd2,0x01,0x00,0x00, -0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xd4,0x01,0x00,0x00,0xd3,0x01,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xd6,0x01,0x00,0x00,0xd0,0x01,0x00,0x00, -0x63,0x04,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xd7,0x01,0x00,0x00,0xd6,0x01,0x00,0x00,0x51,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xd8,0x01,0x00,0x00, -0xd7,0x01,0x00,0x00,0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00, -0xda,0x01,0x00,0x00,0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x68,0x04,0x00,0x00, -0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0xdb,0x01,0x00,0x00, -0xda,0x01,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xdc,0x01,0x00,0x00,0xdb,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xdd,0x01,0x00,0x00,0xdc,0x01,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xde,0x01,0x00,0x00, -0xdd,0x01,0x00,0x00,0x88,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xe0,0x01,0x00,0x00,0xde,0x01,0x00,0x00, -0xd4,0x01,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xe1,0x01,0x00,0x00,0xe0,0x01,0x00,0x00,0xc2,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0xe2,0x01,0x00,0x00,0xdb,0x01,0x00,0x00, -0x6f,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xe3,0x01,0x00,0x00,0xe2,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xe4,0x01,0x00,0x00,0xe3,0x01,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xe6,0x01,0x00,0x00, -0xe4,0x01,0x00,0x00,0xd8,0x01,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xe7,0x01,0x00,0x00,0xe6,0x01,0x00,0x00, -0x50,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0xe8,0x01,0x00,0x00, -0xe1,0x01,0x00,0x00,0xe7,0x01,0x00,0x00,0x83,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0xe9,0x01,0x00,0x00,0xe8,0x01,0x00,0x00, -0xcd,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0xea,0x01,0x00,0x00,0xe9,0x01,0x00,0x00,0xc8,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xf0,0x01,0x00,0x00, -0xa9,0x00,0x00,0x00,0x68,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xf2,0x01,0x00,0x00,0xea,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00, -0xf3,0x01,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xf0,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xf3,0x01,0x00,0x00, -0xf2,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xfa,0x01,0x00,0x00,0xa9,0x00,0x00,0x00,0x69,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xfb,0x01,0x00,0x00, -0xea,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x59,0x00,0x00,0x00,0xfc,0x01,0x00,0x00,0xa1,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xfa,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0xfc,0x01,0x00,0x00,0xfb,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x05,0x02,0x00,0x00,0x5a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x07,0x02,0x00,0x00, -0x60,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x08,0x02,0x00,0x00,0x07,0x02,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x09,0x02,0x00,0x00,0x08,0x02,0x00,0x00, -0x48,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x0b,0x02,0x00,0x00,0x65,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x0c,0x02,0x00,0x00,0x0b,0x02,0x00,0x00, -0xc5,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x0d,0x02,0x00,0x00, -0x09,0x02,0x00,0x00,0x0c,0x02,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x0e,0x02,0x00,0x00,0x0d,0x02,0x00,0x00, -0x6a,0x04,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x0f,0x02,0x00,0x00,0x0e,0x02,0x00,0x00,0x6f,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x10,0x02,0x00,0x00, -0x0f,0x02,0x00,0x00,0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x11,0x02,0x00,0x00,0x10,0x02,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x13,0x02,0x00,0x00, -0x0d,0x02,0x00,0x00,0x65,0x04,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x14,0x02,0x00,0x00,0x13,0x02,0x00,0x00, -0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x15,0x02,0x00,0x00,0x14,0x02,0x00,0x00,0x41,0x00,0x08,0x00, -0x7f,0x00,0x00,0x00,0x17,0x02,0x00,0x00,0x57,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x6a,0x04,0x00,0x00,0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x18,0x02,0x00,0x00,0x17,0x02,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x19,0x02,0x00,0x00,0x18,0x02,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1a,0x02,0x00,0x00, -0x19,0x02,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x1b,0x02,0x00,0x00,0x1a,0x02,0x00,0x00,0x88,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x1d,0x02,0x00,0x00, -0x1b,0x02,0x00,0x00,0x11,0x02,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x1e,0x02,0x00,0x00,0x1d,0x02,0x00,0x00, -0xc2,0x00,0x05,0x00,0x50,0x00,0x00,0x00,0x1f,0x02,0x00,0x00, -0x18,0x02,0x00,0x00,0x6f,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x20,0x02,0x00,0x00,0x1f,0x02,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x21,0x02,0x00,0x00, -0x20,0x02,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x23,0x02,0x00,0x00,0x21,0x02,0x00,0x00,0x15,0x02,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x24,0x02,0x00,0x00, -0x23,0x02,0x00,0x00,0x50,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0x25,0x02,0x00,0x00,0x1e,0x02,0x00,0x00,0x24,0x02,0x00,0x00, -0x83,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x26,0x02,0x00,0x00, -0x25,0x02,0x00,0x00,0xcd,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0x27,0x02,0x00,0x00,0x26,0x02,0x00,0x00, -0x05,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x2d,0x02,0x00,0x00,0xa9,0x00,0x00,0x00,0x6a,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x2f,0x02,0x00,0x00, -0x27,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x59,0x00,0x00,0x00,0x30,0x02,0x00,0x00,0xa1,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x2d,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x30,0x02,0x00,0x00,0x2f,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x37,0x02,0x00,0x00,0xa9,0x00,0x00,0x00, -0x6b,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x38,0x02,0x00,0x00,0x27,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00,0x39,0x02,0x00,0x00, -0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x37,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x39,0x02,0x00,0x00,0x38,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x42,0x02,0x00,0x00, -0x5a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x44,0x02,0x00,0x00,0x60,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x45,0x02,0x00,0x00,0x44,0x02,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x46,0x02,0x00,0x00, -0x45,0x02,0x00,0x00,0x48,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x48,0x02,0x00,0x00,0x65,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x49,0x02,0x00,0x00, -0x48,0x02,0x00,0x00,0xc5,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x4a,0x02,0x00,0x00,0x46,0x02,0x00,0x00,0x49,0x02,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x4b,0x02,0x00,0x00, -0x4a,0x02,0x00,0x00,0x6c,0x04,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x4c,0x02,0x00,0x00,0x4b,0x02,0x00,0x00, -0x6f,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x4d,0x02,0x00,0x00,0x4c,0x02,0x00,0x00,0x51,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x4e,0x02,0x00,0x00, -0x4d,0x02,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x50,0x02,0x00,0x00,0x4a,0x02,0x00,0x00,0x66,0x04,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x51,0x02,0x00,0x00, -0x50,0x02,0x00,0x00,0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x52,0x02,0x00,0x00,0x51,0x02,0x00,0x00, -0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00,0x54,0x02,0x00,0x00, -0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x6c,0x04,0x00,0x00,0x3d,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x55,0x02,0x00,0x00,0x54,0x02,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x56,0x02,0x00,0x00, -0x55,0x02,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x57,0x02,0x00,0x00,0x56,0x02,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x58,0x02,0x00,0x00,0x57,0x02,0x00,0x00, -0x88,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x5a,0x02,0x00,0x00,0x58,0x02,0x00,0x00,0x4e,0x02,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x5b,0x02,0x00,0x00, -0x5a,0x02,0x00,0x00,0xc2,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0x5c,0x02,0x00,0x00,0x55,0x02,0x00,0x00,0x6f,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x5d,0x02,0x00,0x00, -0x5c,0x02,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x5e,0x02,0x00,0x00,0x5d,0x02,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x60,0x02,0x00,0x00,0x5e,0x02,0x00,0x00, -0x52,0x02,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x61,0x02,0x00,0x00,0x60,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0x62,0x02,0x00,0x00,0x5b,0x02,0x00,0x00, -0x61,0x02,0x00,0x00,0x83,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0x63,0x02,0x00,0x00,0x62,0x02,0x00,0x00,0xcd,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x64,0x02,0x00,0x00, -0x63,0x02,0x00,0x00,0x42,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6a,0x02,0x00,0x00,0xa9,0x00,0x00,0x00, -0x6c,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x6c,0x02,0x00,0x00,0x64,0x02,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00,0x6d,0x02,0x00,0x00, -0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x6a,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x6d,0x02,0x00,0x00,0x6c,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x74,0x02,0x00,0x00, -0xa9,0x00,0x00,0x00,0x6d,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x75,0x02,0x00,0x00,0x64,0x02,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00, -0x76,0x02,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x74,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x76,0x02,0x00,0x00, -0x75,0x02,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x7f,0x02,0x00,0x00,0x5a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x81,0x02,0x00,0x00,0x60,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x82,0x02,0x00,0x00, -0x81,0x02,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x83,0x02,0x00,0x00,0x82,0x02,0x00,0x00,0x48,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x85,0x02,0x00,0x00, -0x65,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x86,0x02,0x00,0x00,0x85,0x02,0x00,0x00,0xc5,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x87,0x02,0x00,0x00,0x83,0x02,0x00,0x00, -0x86,0x02,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x88,0x02,0x00,0x00,0x87,0x02,0x00,0x00,0x6e,0x04,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x89,0x02,0x00,0x00, -0x88,0x02,0x00,0x00,0x6f,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x8a,0x02,0x00,0x00,0x89,0x02,0x00,0x00, -0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x8b,0x02,0x00,0x00,0x8a,0x02,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x8d,0x02,0x00,0x00,0x87,0x02,0x00,0x00, -0x67,0x04,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x8e,0x02,0x00,0x00,0x8d,0x02,0x00,0x00,0x51,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x8f,0x02,0x00,0x00, -0x8e,0x02,0x00,0x00,0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00, -0x91,0x02,0x00,0x00,0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x6e,0x04,0x00,0x00, -0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x92,0x02,0x00,0x00, -0x91,0x02,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x93,0x02,0x00,0x00,0x92,0x02,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x94,0x02,0x00,0x00,0x93,0x02,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x95,0x02,0x00,0x00, -0x94,0x02,0x00,0x00,0x88,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x97,0x02,0x00,0x00,0x95,0x02,0x00,0x00, -0x8b,0x02,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x98,0x02,0x00,0x00,0x97,0x02,0x00,0x00,0xc2,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0x99,0x02,0x00,0x00,0x92,0x02,0x00,0x00, -0x6f,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x9a,0x02,0x00,0x00,0x99,0x02,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x9b,0x02,0x00,0x00,0x9a,0x02,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x9d,0x02,0x00,0x00, -0x9b,0x02,0x00,0x00,0x8f,0x02,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x9e,0x02,0x00,0x00,0x9d,0x02,0x00,0x00, -0x50,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x9f,0x02,0x00,0x00, -0x98,0x02,0x00,0x00,0x9e,0x02,0x00,0x00,0x83,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0xa0,0x02,0x00,0x00,0x9f,0x02,0x00,0x00, -0xcd,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0xa1,0x02,0x00,0x00,0xa0,0x02,0x00,0x00,0x7f,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xa7,0x02,0x00,0x00, -0xa9,0x00,0x00,0x00,0x6e,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xa9,0x02,0x00,0x00,0xa1,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00, -0xaa,0x02,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xa7,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0xaa,0x02,0x00,0x00, -0xa9,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xb1,0x02,0x00,0x00,0xa9,0x00,0x00,0x00,0x6f,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xb2,0x02,0x00,0x00, -0xa1,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x59,0x00,0x00,0x00,0xb3,0x02,0x00,0x00,0xa1,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xb1,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0xb3,0x02,0x00,0x00,0xb2,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xbc,0x02,0x00,0x00,0x5a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0xbe,0x02,0x00,0x00, -0x60,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xbf,0x02,0x00,0x00,0xbe,0x02,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xc0,0x02,0x00,0x00,0xbf,0x02,0x00,0x00, -0x48,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0xc2,0x02,0x00,0x00,0x65,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xc3,0x02,0x00,0x00,0xc2,0x02,0x00,0x00, -0xc5,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xc4,0x02,0x00,0x00, -0xc0,0x02,0x00,0x00,0xc3,0x02,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xc5,0x02,0x00,0x00,0xc4,0x02,0x00,0x00, -0x70,0x04,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xc6,0x02,0x00,0x00,0xc5,0x02,0x00,0x00,0x6f,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xc7,0x02,0x00,0x00, -0xc6,0x02,0x00,0x00,0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xc8,0x02,0x00,0x00,0xc7,0x02,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xca,0x02,0x00,0x00, -0xc4,0x02,0x00,0x00,0x69,0x04,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xcb,0x02,0x00,0x00,0xca,0x02,0x00,0x00, -0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xcc,0x02,0x00,0x00,0xcb,0x02,0x00,0x00,0x41,0x00,0x08,0x00, -0x7f,0x00,0x00,0x00,0xce,0x02,0x00,0x00,0x57,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x70,0x04,0x00,0x00,0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0xcf,0x02,0x00,0x00,0xce,0x02,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xd0,0x02,0x00,0x00,0xcf,0x02,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xd1,0x02,0x00,0x00, -0xd0,0x02,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xd2,0x02,0x00,0x00,0xd1,0x02,0x00,0x00,0x88,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xd4,0x02,0x00,0x00, -0xd2,0x02,0x00,0x00,0xc8,0x02,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xd5,0x02,0x00,0x00,0xd4,0x02,0x00,0x00, -0xc2,0x00,0x05,0x00,0x50,0x00,0x00,0x00,0xd6,0x02,0x00,0x00, -0xcf,0x02,0x00,0x00,0x6f,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xd7,0x02,0x00,0x00,0xd6,0x02,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xd8,0x02,0x00,0x00, -0xd7,0x02,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xda,0x02,0x00,0x00,0xd8,0x02,0x00,0x00,0xcc,0x02,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xdb,0x02,0x00,0x00, -0xda,0x02,0x00,0x00,0x50,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0xdc,0x02,0x00,0x00,0xd5,0x02,0x00,0x00,0xdb,0x02,0x00,0x00, -0x83,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0xdd,0x02,0x00,0x00, -0xdc,0x02,0x00,0x00,0xcd,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0xde,0x02,0x00,0x00,0xdd,0x02,0x00,0x00, -0xbc,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xe4,0x02,0x00,0x00,0xa9,0x00,0x00,0x00,0x70,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xe6,0x02,0x00,0x00, -0xde,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x59,0x00,0x00,0x00,0xe7,0x02,0x00,0x00,0xa1,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xe4,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0xe7,0x02,0x00,0x00,0xe6,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xee,0x02,0x00,0x00,0xa9,0x00,0x00,0x00, -0x71,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xef,0x02,0x00,0x00,0xde,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00,0xf0,0x02,0x00,0x00, -0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xee,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0xf0,0x02,0x00,0x00,0xef,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xf9,0x02,0x00,0x00, -0x5a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0xfb,0x02,0x00,0x00,0x60,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xfc,0x02,0x00,0x00,0xfb,0x02,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xfd,0x02,0x00,0x00, -0xfc,0x02,0x00,0x00,0x48,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0xff,0x02,0x00,0x00,0x65,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x00,0x03,0x00,0x00, -0xff,0x02,0x00,0x00,0xc5,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x01,0x03,0x00,0x00,0xfd,0x02,0x00,0x00,0x00,0x03,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x02,0x03,0x00,0x00, -0x01,0x03,0x00,0x00,0x72,0x04,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x03,0x03,0x00,0x00,0x02,0x03,0x00,0x00, -0x6f,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x04,0x03,0x00,0x00,0x03,0x03,0x00,0x00,0x51,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x05,0x03,0x00,0x00, -0x04,0x03,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x07,0x03,0x00,0x00,0x01,0x03,0x00,0x00,0x6b,0x04,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x08,0x03,0x00,0x00, -0x07,0x03,0x00,0x00,0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x09,0x03,0x00,0x00,0x08,0x03,0x00,0x00, -0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00,0x0b,0x03,0x00,0x00, -0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x72,0x04,0x00,0x00,0x3d,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x0c,0x03,0x00,0x00,0x0b,0x03,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x0d,0x03,0x00,0x00, -0x0c,0x03,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0e,0x03,0x00,0x00,0x0d,0x03,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x0f,0x03,0x00,0x00,0x0e,0x03,0x00,0x00, -0x88,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x11,0x03,0x00,0x00,0x0f,0x03,0x00,0x00,0x05,0x03,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x12,0x03,0x00,0x00, -0x11,0x03,0x00,0x00,0xc2,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0x13,0x03,0x00,0x00,0x0c,0x03,0x00,0x00,0x6f,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x14,0x03,0x00,0x00, -0x13,0x03,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x15,0x03,0x00,0x00,0x14,0x03,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x17,0x03,0x00,0x00,0x15,0x03,0x00,0x00, -0x09,0x03,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x18,0x03,0x00,0x00,0x17,0x03,0x00,0x00,0x50,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0x19,0x03,0x00,0x00,0x12,0x03,0x00,0x00, -0x18,0x03,0x00,0x00,0x83,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0x1a,0x03,0x00,0x00,0x19,0x03,0x00,0x00,0xcd,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x1b,0x03,0x00,0x00, -0x1a,0x03,0x00,0x00,0xf9,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x21,0x03,0x00,0x00,0xa9,0x00,0x00,0x00, -0x72,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x23,0x03,0x00,0x00,0x1b,0x03,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00,0x24,0x03,0x00,0x00, -0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x21,0x03,0x00,0x00, -0x3e,0x00,0x03,0x00,0x24,0x03,0x00,0x00,0x23,0x03,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x2b,0x03,0x00,0x00, -0xa9,0x00,0x00,0x00,0x73,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x2c,0x03,0x00,0x00,0x1b,0x03,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00, -0x2d,0x03,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x2b,0x03,0x00,0x00,0x3e,0x00,0x03,0x00,0x2d,0x03,0x00,0x00, -0x2c,0x03,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x36,0x03,0x00,0x00,0x5a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x38,0x03,0x00,0x00,0x60,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x39,0x03,0x00,0x00, -0x38,0x03,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x3a,0x03,0x00,0x00,0x39,0x03,0x00,0x00,0x48,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x3c,0x03,0x00,0x00, -0x65,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x3d,0x03,0x00,0x00,0x3c,0x03,0x00,0x00,0xc5,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x3e,0x03,0x00,0x00,0x3a,0x03,0x00,0x00, -0x3d,0x03,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x3f,0x03,0x00,0x00,0x3e,0x03,0x00,0x00,0x74,0x04,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x40,0x03,0x00,0x00, -0x3f,0x03,0x00,0x00,0x6f,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x41,0x03,0x00,0x00,0x40,0x03,0x00,0x00, -0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x42,0x03,0x00,0x00,0x41,0x03,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x44,0x03,0x00,0x00,0x3e,0x03,0x00,0x00, -0x6d,0x04,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x45,0x03,0x00,0x00,0x44,0x03,0x00,0x00,0x51,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x46,0x03,0x00,0x00, -0x45,0x03,0x00,0x00,0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00, -0x48,0x03,0x00,0x00,0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x74,0x04,0x00,0x00, -0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x49,0x03,0x00,0x00, -0x48,0x03,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x4a,0x03,0x00,0x00,0x49,0x03,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x4b,0x03,0x00,0x00,0x4a,0x03,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4c,0x03,0x00,0x00, -0x4b,0x03,0x00,0x00,0x88,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4e,0x03,0x00,0x00,0x4c,0x03,0x00,0x00, -0x42,0x03,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x4f,0x03,0x00,0x00,0x4e,0x03,0x00,0x00,0xc2,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0x50,0x03,0x00,0x00,0x49,0x03,0x00,0x00, -0x6f,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x51,0x03,0x00,0x00,0x50,0x03,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x52,0x03,0x00,0x00,0x51,0x03,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x54,0x03,0x00,0x00, -0x52,0x03,0x00,0x00,0x46,0x03,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x55,0x03,0x00,0x00,0x54,0x03,0x00,0x00, -0x50,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x56,0x03,0x00,0x00, -0x4f,0x03,0x00,0x00,0x55,0x03,0x00,0x00,0x83,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0x57,0x03,0x00,0x00,0x56,0x03,0x00,0x00, -0xcd,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0x58,0x03,0x00,0x00,0x57,0x03,0x00,0x00,0x36,0x03,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x5e,0x03,0x00,0x00, -0xa9,0x00,0x00,0x00,0x74,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x60,0x03,0x00,0x00,0x58,0x03,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00, -0x61,0x03,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x5e,0x03,0x00,0x00,0x3e,0x00,0x03,0x00,0x61,0x03,0x00,0x00, -0x60,0x03,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x68,0x03,0x00,0x00,0xa9,0x00,0x00,0x00,0x75,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x69,0x03,0x00,0x00, -0x58,0x03,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x59,0x00,0x00,0x00,0x6a,0x03,0x00,0x00,0xa1,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x68,0x03,0x00,0x00,0x3e,0x00,0x03,0x00, -0x6a,0x03,0x00,0x00,0x69,0x03,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x73,0x03,0x00,0x00,0x5a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x75,0x03,0x00,0x00, -0x60,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x76,0x03,0x00,0x00,0x75,0x03,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x77,0x03,0x00,0x00,0x76,0x03,0x00,0x00, -0x48,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x79,0x03,0x00,0x00,0x65,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x7a,0x03,0x00,0x00,0x79,0x03,0x00,0x00, -0xc5,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x7b,0x03,0x00,0x00, -0x77,0x03,0x00,0x00,0x7a,0x03,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x7c,0x03,0x00,0x00,0x7b,0x03,0x00,0x00, -0x75,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x7d,0x03,0x00,0x00,0x7c,0x03,0x00,0x00,0x6f,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x7e,0x03,0x00,0x00, -0x7d,0x03,0x00,0x00,0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x7f,0x03,0x00,0x00,0x7e,0x03,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x81,0x03,0x00,0x00, -0x7b,0x03,0x00,0x00,0x6f,0x04,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x82,0x03,0x00,0x00,0x81,0x03,0x00,0x00, -0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x83,0x03,0x00,0x00,0x82,0x03,0x00,0x00,0x41,0x00,0x08,0x00, -0x7f,0x00,0x00,0x00,0x85,0x03,0x00,0x00,0x57,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x75,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x86,0x03,0x00,0x00,0x85,0x03,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x87,0x03,0x00,0x00,0x86,0x03,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x88,0x03,0x00,0x00, -0x87,0x03,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x89,0x03,0x00,0x00,0x88,0x03,0x00,0x00,0x88,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x8b,0x03,0x00,0x00, -0x89,0x03,0x00,0x00,0x7f,0x03,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x8c,0x03,0x00,0x00,0x8b,0x03,0x00,0x00, -0xc2,0x00,0x05,0x00,0x50,0x00,0x00,0x00,0x8d,0x03,0x00,0x00, -0x86,0x03,0x00,0x00,0x6f,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x8e,0x03,0x00,0x00,0x8d,0x03,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x8f,0x03,0x00,0x00, -0x8e,0x03,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x91,0x03,0x00,0x00,0x8f,0x03,0x00,0x00,0x83,0x03,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x92,0x03,0x00,0x00, -0x91,0x03,0x00,0x00,0x50,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0x93,0x03,0x00,0x00,0x8c,0x03,0x00,0x00,0x92,0x03,0x00,0x00, -0x83,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x94,0x03,0x00,0x00, -0x93,0x03,0x00,0x00,0xcd,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0x95,0x03,0x00,0x00,0x94,0x03,0x00,0x00, -0x73,0x03,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x9b,0x03,0x00,0x00,0xa9,0x00,0x00,0x00,0x75,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x9d,0x03,0x00,0x00, -0x95,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x59,0x00,0x00,0x00,0x9e,0x03,0x00,0x00,0xa1,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x9b,0x03,0x00,0x00,0x3e,0x00,0x03,0x00, -0x9e,0x03,0x00,0x00,0x9d,0x03,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa5,0x03,0x00,0x00,0xa9,0x00,0x00,0x00, -0x76,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xa6,0x03,0x00,0x00,0x95,0x03,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00,0xa7,0x03,0x00,0x00, -0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xa5,0x03,0x00,0x00, -0x3e,0x00,0x03,0x00,0xa7,0x03,0x00,0x00,0xa6,0x03,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xb0,0x03,0x00,0x00, -0x5a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0xb2,0x03,0x00,0x00,0x60,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xb3,0x03,0x00,0x00,0xb2,0x03,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xb4,0x03,0x00,0x00, -0xb3,0x03,0x00,0x00,0x48,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0xb6,0x03,0x00,0x00,0x65,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xb7,0x03,0x00,0x00, -0xb6,0x03,0x00,0x00,0xc5,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xb8,0x03,0x00,0x00,0xb4,0x03,0x00,0x00,0xb7,0x03,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xb9,0x03,0x00,0x00, -0xb8,0x03,0x00,0x00,0x62,0x04,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xba,0x03,0x00,0x00,0xb9,0x03,0x00,0x00, -0x6f,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xbb,0x03,0x00,0x00,0xba,0x03,0x00,0x00,0x51,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xbc,0x03,0x00,0x00, -0xbb,0x03,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xbe,0x03,0x00,0x00,0xb8,0x03,0x00,0x00,0x71,0x04,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xbf,0x03,0x00,0x00, -0xbe,0x03,0x00,0x00,0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xc0,0x03,0x00,0x00,0xbf,0x03,0x00,0x00, -0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00,0xc2,0x03,0x00,0x00, -0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x62,0x04,0x00,0x00,0x3d,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0xc3,0x03,0x00,0x00,0xc2,0x03,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xc4,0x03,0x00,0x00, -0xc3,0x03,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xc5,0x03,0x00,0x00,0xc4,0x03,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xc6,0x03,0x00,0x00,0xc5,0x03,0x00,0x00, -0x88,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xc8,0x03,0x00,0x00,0xc6,0x03,0x00,0x00,0xbc,0x03,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xc9,0x03,0x00,0x00, -0xc8,0x03,0x00,0x00,0xc2,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0xca,0x03,0x00,0x00,0xc3,0x03,0x00,0x00,0x6f,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xcb,0x03,0x00,0x00, -0xca,0x03,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xcc,0x03,0x00,0x00,0xcb,0x03,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xce,0x03,0x00,0x00,0xcc,0x03,0x00,0x00, -0xc0,0x03,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xcf,0x03,0x00,0x00,0xce,0x03,0x00,0x00,0x50,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0xd0,0x03,0x00,0x00,0xc9,0x03,0x00,0x00, -0xcf,0x03,0x00,0x00,0x83,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0xd1,0x03,0x00,0x00,0xd0,0x03,0x00,0x00,0xcd,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0xd2,0x03,0x00,0x00, -0xd1,0x03,0x00,0x00,0xb0,0x03,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xd8,0x03,0x00,0x00,0xa9,0x00,0x00,0x00, -0x62,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xda,0x03,0x00,0x00,0xd2,0x03,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00,0xdb,0x03,0x00,0x00, -0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xd8,0x03,0x00,0x00, -0x3e,0x00,0x03,0x00,0xdb,0x03,0x00,0x00,0xda,0x03,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xe2,0x03,0x00,0x00, -0xa9,0x00,0x00,0x00,0x77,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xe3,0x03,0x00,0x00,0xd2,0x03,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00, -0xe4,0x03,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xe2,0x03,0x00,0x00,0x3e,0x00,0x03,0x00,0xe4,0x03,0x00,0x00, -0xe3,0x03,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xed,0x03,0x00,0x00,0x5a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0xef,0x03,0x00,0x00,0x60,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xf0,0x03,0x00,0x00, -0xef,0x03,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xf1,0x03,0x00,0x00,0xf0,0x03,0x00,0x00,0x48,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0xf3,0x03,0x00,0x00, -0x65,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xf4,0x03,0x00,0x00,0xf3,0x03,0x00,0x00,0xc5,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xf5,0x03,0x00,0x00,0xf1,0x03,0x00,0x00, -0xf4,0x03,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xf6,0x03,0x00,0x00,0xf5,0x03,0x00,0x00,0x64,0x04,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xf7,0x03,0x00,0x00, -0xf6,0x03,0x00,0x00,0x6f,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xf8,0x03,0x00,0x00,0xf7,0x03,0x00,0x00, -0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xf9,0x03,0x00,0x00,0xf8,0x03,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xfb,0x03,0x00,0x00,0xf5,0x03,0x00,0x00, -0x73,0x04,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xfc,0x03,0x00,0x00,0xfb,0x03,0x00,0x00,0x51,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xfd,0x03,0x00,0x00, -0xfc,0x03,0x00,0x00,0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00, -0xff,0x03,0x00,0x00,0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x64,0x04,0x00,0x00, -0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x00,0x04,0x00,0x00, -0xff,0x03,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x01,0x04,0x00,0x00,0x00,0x04,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x02,0x04,0x00,0x00,0x01,0x04,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x03,0x04,0x00,0x00, -0x02,0x04,0x00,0x00,0x88,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x05,0x04,0x00,0x00,0x03,0x04,0x00,0x00, -0xf9,0x03,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x06,0x04,0x00,0x00,0x05,0x04,0x00,0x00,0xc2,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0x07,0x04,0x00,0x00,0x00,0x04,0x00,0x00, -0x6f,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x08,0x04,0x00,0x00,0x07,0x04,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x09,0x04,0x00,0x00,0x08,0x04,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x0b,0x04,0x00,0x00, -0x09,0x04,0x00,0x00,0xfd,0x03,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x0c,0x04,0x00,0x00,0x0b,0x04,0x00,0x00, -0x50,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x0d,0x04,0x00,0x00, -0x06,0x04,0x00,0x00,0x0c,0x04,0x00,0x00,0x83,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0x0e,0x04,0x00,0x00,0x0d,0x04,0x00,0x00, -0xcd,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0x0f,0x04,0x00,0x00,0x0e,0x04,0x00,0x00,0xed,0x03,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x15,0x04,0x00,0x00, -0xa9,0x00,0x00,0x00,0x64,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x17,0x04,0x00,0x00,0x0f,0x04,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00, -0x18,0x04,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x15,0x04,0x00,0x00,0x3e,0x00,0x03,0x00,0x18,0x04,0x00,0x00, -0x17,0x04,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x1f,0x04,0x00,0x00,0xa9,0x00,0x00,0x00,0x78,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x20,0x04,0x00,0x00, -0x0f,0x04,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x59,0x00,0x00,0x00,0x21,0x04,0x00,0x00,0xa1,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x1f,0x04,0x00,0x00,0x3e,0x00,0x03,0x00, -0x21,0x04,0x00,0x00,0x20,0x04,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x2a,0x04,0x00,0x00,0x5a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x2c,0x04,0x00,0x00, -0x60,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x2d,0x04,0x00,0x00,0x2c,0x04,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x2e,0x04,0x00,0x00,0x2d,0x04,0x00,0x00, -0x48,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x30,0x04,0x00,0x00,0x65,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x31,0x04,0x00,0x00,0x30,0x04,0x00,0x00, -0xc5,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x32,0x04,0x00,0x00, -0x2e,0x04,0x00,0x00,0x31,0x04,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x33,0x04,0x00,0x00,0x32,0x04,0x00,0x00, -0x88,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x34,0x04,0x00,0x00,0x33,0x04,0x00,0x00,0x6f,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x35,0x04,0x00,0x00, -0x34,0x04,0x00,0x00,0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x36,0x04,0x00,0x00,0x35,0x04,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x38,0x04,0x00,0x00, -0x32,0x04,0x00,0x00,0x75,0x04,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x39,0x04,0x00,0x00,0x38,0x04,0x00,0x00, -0x51,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x3a,0x04,0x00,0x00,0x39,0x04,0x00,0x00,0x41,0x00,0x08,0x00, -0x7f,0x00,0x00,0x00,0x3c,0x04,0x00,0x00,0x57,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x3d,0x04,0x00,0x00,0x3c,0x04,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x3e,0x04,0x00,0x00,0x3d,0x04,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x3f,0x04,0x00,0x00, -0x3e,0x04,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x40,0x04,0x00,0x00,0x3f,0x04,0x00,0x00,0x88,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x42,0x04,0x00,0x00, -0x40,0x04,0x00,0x00,0x36,0x04,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x43,0x04,0x00,0x00,0x42,0x04,0x00,0x00, -0xc2,0x00,0x05,0x00,0x50,0x00,0x00,0x00,0x44,0x04,0x00,0x00, -0x3d,0x04,0x00,0x00,0x6f,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x45,0x04,0x00,0x00,0x44,0x04,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x46,0x04,0x00,0x00, -0x45,0x04,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x48,0x04,0x00,0x00,0x46,0x04,0x00,0x00,0x3a,0x04,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x49,0x04,0x00,0x00, -0x48,0x04,0x00,0x00,0x50,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0x4a,0x04,0x00,0x00,0x43,0x04,0x00,0x00,0x49,0x04,0x00,0x00, -0x83,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x4b,0x04,0x00,0x00, -0x4a,0x04,0x00,0x00,0xcd,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0x4c,0x04,0x00,0x00,0x4b,0x04,0x00,0x00, -0x2a,0x04,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x52,0x04,0x00,0x00,0xa9,0x00,0x00,0x00,0x88,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x54,0x04,0x00,0x00, -0x4c,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x59,0x00,0x00,0x00,0x55,0x04,0x00,0x00,0xa1,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x52,0x04,0x00,0x00,0x3e,0x00,0x03,0x00, -0x55,0x04,0x00,0x00,0x54,0x04,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x5c,0x04,0x00,0x00,0xa9,0x00,0x00,0x00, -0x79,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x5d,0x04,0x00,0x00,0x4c,0x04,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x59,0x00,0x00,0x00,0x5e,0x04,0x00,0x00, -0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x5c,0x04,0x00,0x00, -0x3e,0x00,0x03,0x00,0x5e,0x04,0x00,0x00,0x5d,0x04,0x00,0x00, -0xf9,0x00,0x02,0x00,0xc1,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0xc1,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, - -}; -const uint64_t dequant_q5_0_len = 13428; - -unsigned char dequant_q5_0_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x9b,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, @@ -8960,1079 +5015,10 @@ unsigned char dequant_q5_0_fp32_data[] = { 0xc3,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xc3,0x00,0x00,0x00, 0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t dequant_q5_0_fp32_len = 13952; +const uint64_t dequant_q5_0_len = 13952; unsigned char dequant_q5_1_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x63,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x09,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x9b,0x00,0x00,0x00, -0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x0c,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x14,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x14,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x50,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x51,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x52,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x52,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x54,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x54,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x98,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x99,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x99,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x99,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x9b,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x9b,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0xb9,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, -0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0a,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x0d,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x0e,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x1e,0x00,0x06,0x00,0x14,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x15,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x15,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x18,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x4a,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1c,0x00,0x04,0x00,0x4f,0x00,0x00,0x00, -0x4d,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x1e,0x00,0x06,0x00, -0x50,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x51,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x52,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x61,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x66,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x79,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x4d,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x7c,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x98,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x99,0x00,0x00,0x00, -0x98,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x9a,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x99,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x9a,0x00,0x00,0x00,0x9b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xb8,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x2c,0x00,0x06,0x00,0x0a,0x00,0x00,0x00, -0xb9,0x00,0x00,0x00,0xb8,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x4b,0x04,0x00,0x00,0x0d,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x4c,0x04,0x00,0x00,0x11,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x4d,0x04,0x00,0x00, -0x0e,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x4e,0x04,0x00,0x00,0x12,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x4f,0x04,0x00,0x00,0x13,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x50,0x04,0x00,0x00, -0x14,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x51,0x04,0x00,0x00,0x05,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x52,0x04,0x00,0x00,0x15,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x53,0x04,0x00,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x54,0x04,0x00,0x00,0x16,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x55,0x04,0x00,0x00,0x07,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x56,0x04,0x00,0x00, -0x17,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x57,0x04,0x00,0x00,0x08,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x58,0x04,0x00,0x00,0x18,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x59,0x04,0x00,0x00, -0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x5a,0x04,0x00,0x00,0x19,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x5b,0x04,0x00,0x00,0x0a,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x5c,0x04,0x00,0x00, -0x1a,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x5d,0x04,0x00,0x00,0x0b,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x5e,0x04,0x00,0x00,0x1b,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x5f,0x04,0x00,0x00, -0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x60,0x04,0x00,0x00,0x1d,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x61,0x04,0x00,0x00,0x1e,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x62,0x04,0x00,0x00, -0x1f,0x00,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0xba,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00, -0x0d,0x00,0x00,0x00,0xbb,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0xbb,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0e,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x0d,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x18,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x26,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0xaf,0x00,0x05,0x00,0x24,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x26,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0xa8,0x00,0x04,0x00,0x24,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x2c,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x2a,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x2b,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00, -0x2f,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x30,0x00,0x00,0x00, -0x2f,0x00,0x00,0x00,0xaf,0x00,0x05,0x00,0x24,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x30,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x2c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x2c,0x00,0x00,0x00,0xf5,0x00,0x07,0x00,0x24,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0xbb,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x32,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x34,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x33,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0xba,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x34,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00,0x38,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x39,0x00,0x00,0x00,0x38,0x00,0x00,0x00, -0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x39,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3e,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x3e,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x41,0x00,0x07,0x00,0x56,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x41,0x00,0x07,0x00, -0x56,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x61,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x63,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x69,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x6c,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x71,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0x71,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x79,0x00,0x00,0x00, -0x7a,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x7a,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x81,0x00,0x00,0x00,0x80,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x83,0x00,0x00,0x00, -0x81,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x83,0x00,0x00,0x00, -0x69,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x87,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x8a,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x8b,0x00,0x00,0x00,0x8a,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x8f,0x00,0x00,0x00, -0x8b,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0x8f,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x91,0x00,0x00,0x00, -0x87,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x94,0x00,0x00,0x00,0x91,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0x96,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, -0x81,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x97,0x00,0x00,0x00, -0x94,0x00,0x00,0x00,0x96,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x18,0x00,0x00,0x00,0x9d,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x9e,0x00,0x00,0x00,0x9d,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x9f,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x9e,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xa2,0x00,0x00,0x00,0x9f,0x00,0x00,0x00,0x26,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xa7,0x00,0x00,0x00, -0x97,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0xa8,0x00,0x00,0x00,0x9b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xa2,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xa8,0x00,0x00,0x00,0xa7,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xb2,0x00,0x00,0x00,0xa2,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xb4,0x00,0x00,0x00,0x97,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xb5,0x00,0x00,0x00, -0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xb2,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xb5,0x00,0x00,0x00,0xb4,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xcc,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xce,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xd0,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xd1,0x00,0x00,0x00, -0xd0,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xd2,0x00,0x00,0x00,0xd1,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xd3,0x00,0x00,0x00,0xd2,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xd4,0x00,0x00,0x00, -0xd3,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xd6,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xd8,0x00,0x00,0x00,0xd6,0x00,0x00,0x00, -0x4b,0x04,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xd9,0x00,0x00,0x00,0xd8,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xda,0x00,0x00,0x00, -0xd9,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x79,0x00,0x00,0x00, -0xdc,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0xdd,0x00,0x00,0x00, -0xdc,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xde,0x00,0x00,0x00,0xdd,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xdf,0x00,0x00,0x00,0xde,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xe0,0x00,0x00,0x00, -0xdf,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xe2,0x00,0x00,0x00,0xe0,0x00,0x00,0x00, -0xd4,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xe3,0x00,0x00,0x00,0xe2,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0xe4,0x00,0x00,0x00,0xdd,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xe5,0x00,0x00,0x00,0xe4,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xe6,0x00,0x00,0x00,0xe5,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xe8,0x00,0x00,0x00, -0xe6,0x00,0x00,0x00,0xda,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xe9,0x00,0x00,0x00,0xe8,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0xea,0x00,0x00,0x00, -0xe3,0x00,0x00,0x00,0xe9,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0xeb,0x00,0x00,0x00,0xea,0x00,0x00,0x00, -0xcc,0x00,0x00,0x00,0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0xec,0x00,0x00,0x00,0xce,0x00,0x00,0x00,0xce,0x00,0x00,0x00, -0x81,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0xed,0x00,0x00,0x00, -0xeb,0x00,0x00,0x00,0xec,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xf3,0x00,0x00,0x00,0xa2,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xf5,0x00,0x00,0x00,0xed,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xf6,0x00,0x00,0x00, -0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xf3,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xf6,0x00,0x00,0x00,0xf5,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xfd,0x00,0x00,0x00, -0xa2,0x00,0x00,0x00,0x4c,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xfe,0x00,0x00,0x00,0xed,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0xff,0x00,0x00,0x00,0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xfd,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xff,0x00,0x00,0x00, -0xfe,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x08,0x01,0x00,0x00,0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x0a,0x01,0x00,0x00,0x5b,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x0c,0x01,0x00,0x00, -0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x0d,0x01,0x00,0x00,0x0c,0x01,0x00,0x00,0x37,0x00,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x0e,0x01,0x00,0x00, -0x0d,0x01,0x00,0x00,0x66,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x0f,0x01,0x00,0x00,0x0e,0x01,0x00,0x00, -0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x10,0x01,0x00,0x00,0x0f,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x12,0x01,0x00,0x00,0x62,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x14,0x01,0x00,0x00, -0x12,0x01,0x00,0x00,0x4d,0x04,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x15,0x01,0x00,0x00,0x14,0x01,0x00,0x00, -0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x16,0x01,0x00,0x00,0x15,0x01,0x00,0x00,0x41,0x00,0x08,0x00, -0x79,0x00,0x00,0x00,0x18,0x01,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x19,0x01,0x00,0x00,0x18,0x01,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x1a,0x01,0x00,0x00,0x19,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1b,0x01,0x00,0x00, -0x1a,0x01,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x1c,0x01,0x00,0x00,0x1b,0x01,0x00,0x00,0x82,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x1e,0x01,0x00,0x00, -0x1c,0x01,0x00,0x00,0x10,0x01,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x1f,0x01,0x00,0x00,0x1e,0x01,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0x20,0x01,0x00,0x00, -0x19,0x01,0x00,0x00,0x66,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x21,0x01,0x00,0x00,0x20,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x22,0x01,0x00,0x00, -0x21,0x01,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x24,0x01,0x00,0x00,0x22,0x01,0x00,0x00,0x16,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x25,0x01,0x00,0x00, -0x24,0x01,0x00,0x00,0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0x26,0x01,0x00,0x00,0x1f,0x01,0x00,0x00,0x25,0x01,0x00,0x00, -0x8e,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x27,0x01,0x00,0x00, -0x26,0x01,0x00,0x00,0x08,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x28,0x01,0x00,0x00,0x0a,0x01,0x00,0x00, -0x0a,0x01,0x00,0x00,0x81,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0x29,0x01,0x00,0x00,0x27,0x01,0x00,0x00,0x28,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x2f,0x01,0x00,0x00, -0xa2,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x31,0x01,0x00,0x00,0x29,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x32,0x01,0x00,0x00,0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x2f,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x32,0x01,0x00,0x00, -0x31,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x39,0x01,0x00,0x00,0xa2,0x00,0x00,0x00,0x4e,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x3a,0x01,0x00,0x00, -0x29,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x3b,0x01,0x00,0x00,0x9b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x39,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x3b,0x01,0x00,0x00,0x3a,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x44,0x01,0x00,0x00,0x57,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x46,0x01,0x00,0x00, -0x5b,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x48,0x01,0x00,0x00,0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x49,0x01,0x00,0x00,0x48,0x01,0x00,0x00, -0x77,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x4a,0x01,0x00,0x00,0x49,0x01,0x00,0x00,0x66,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x4b,0x01,0x00,0x00, -0x4a,0x01,0x00,0x00,0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x4c,0x01,0x00,0x00,0x4b,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x4e,0x01,0x00,0x00, -0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x50,0x01,0x00,0x00,0x4e,0x01,0x00,0x00,0x82,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x51,0x01,0x00,0x00, -0x50,0x01,0x00,0x00,0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x52,0x01,0x00,0x00,0x51,0x01,0x00,0x00, -0x41,0x00,0x08,0x00,0x79,0x00,0x00,0x00,0x54,0x01,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x55,0x01,0x00,0x00,0x54,0x01,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x56,0x01,0x00,0x00, -0x55,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x57,0x01,0x00,0x00,0x56,0x01,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x58,0x01,0x00,0x00,0x57,0x01,0x00,0x00, -0x82,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x5a,0x01,0x00,0x00,0x58,0x01,0x00,0x00,0x4c,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x5b,0x01,0x00,0x00, -0x5a,0x01,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0x5c,0x01,0x00,0x00,0x55,0x01,0x00,0x00,0x66,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x5d,0x01,0x00,0x00, -0x5c,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x5e,0x01,0x00,0x00,0x5d,0x01,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x60,0x01,0x00,0x00,0x5e,0x01,0x00,0x00, -0x52,0x01,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x61,0x01,0x00,0x00,0x60,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x62,0x01,0x00,0x00,0x5b,0x01,0x00,0x00, -0x61,0x01,0x00,0x00,0x8e,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0x63,0x01,0x00,0x00,0x62,0x01,0x00,0x00,0x44,0x01,0x00,0x00, -0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x64,0x01,0x00,0x00, -0x46,0x01,0x00,0x00,0x46,0x01,0x00,0x00,0x81,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x65,0x01,0x00,0x00,0x63,0x01,0x00,0x00, -0x64,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x6b,0x01,0x00,0x00,0xa2,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x6d,0x01,0x00,0x00, -0x65,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x6e,0x01,0x00,0x00,0x9b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x6b,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x6e,0x01,0x00,0x00,0x6d,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x75,0x01,0x00,0x00,0xa2,0x00,0x00,0x00, -0x4f,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x76,0x01,0x00,0x00,0x65,0x01,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x77,0x01,0x00,0x00, -0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x75,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x77,0x01,0x00,0x00,0x76,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x80,0x01,0x00,0x00, -0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x82,0x01,0x00,0x00,0x5b,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x84,0x01,0x00,0x00,0x62,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x85,0x01,0x00,0x00, -0x84,0x01,0x00,0x00,0x66,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x86,0x01,0x00,0x00,0x85,0x01,0x00,0x00, -0x66,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x87,0x01,0x00,0x00,0x86,0x01,0x00,0x00,0x4e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x88,0x01,0x00,0x00, -0x87,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x8a,0x01,0x00,0x00,0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x8c,0x01,0x00,0x00,0x8a,0x01,0x00,0x00, -0x48,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x8d,0x01,0x00,0x00,0x8c,0x01,0x00,0x00,0x4e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x8e,0x01,0x00,0x00, -0x8d,0x01,0x00,0x00,0x41,0x00,0x08,0x00,0x79,0x00,0x00,0x00, -0x90,0x01,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x66,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x91,0x01,0x00,0x00, -0x90,0x01,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x92,0x01,0x00,0x00,0x91,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x93,0x01,0x00,0x00,0x92,0x01,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x94,0x01,0x00,0x00, -0x93,0x01,0x00,0x00,0x82,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x96,0x01,0x00,0x00,0x94,0x01,0x00,0x00, -0x88,0x01,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x97,0x01,0x00,0x00,0x96,0x01,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0x98,0x01,0x00,0x00,0x91,0x01,0x00,0x00, -0x66,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x99,0x01,0x00,0x00,0x98,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x9a,0x01,0x00,0x00,0x99,0x01,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x9c,0x01,0x00,0x00, -0x9a,0x01,0x00,0x00,0x8e,0x01,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x9d,0x01,0x00,0x00,0x9c,0x01,0x00,0x00, -0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x9e,0x01,0x00,0x00, -0x97,0x01,0x00,0x00,0x9d,0x01,0x00,0x00,0x8e,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x9f,0x01,0x00,0x00,0x9e,0x01,0x00,0x00, -0x80,0x01,0x00,0x00,0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0xa0,0x01,0x00,0x00,0x82,0x01,0x00,0x00,0x82,0x01,0x00,0x00, -0x81,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0xa1,0x01,0x00,0x00, -0x9f,0x01,0x00,0x00,0xa0,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa7,0x01,0x00,0x00,0xa2,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xa9,0x01,0x00,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xaa,0x01,0x00,0x00, -0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xa7,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0xaa,0x01,0x00,0x00,0xa9,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xb1,0x01,0x00,0x00, -0xa2,0x00,0x00,0x00,0x50,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xb2,0x01,0x00,0x00,0xa1,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0xb3,0x01,0x00,0x00,0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xb1,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xb3,0x01,0x00,0x00, -0xb2,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xbc,0x01,0x00,0x00,0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xbe,0x01,0x00,0x00,0x5b,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xc0,0x01,0x00,0x00, -0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xc1,0x01,0x00,0x00,0xc0,0x01,0x00,0x00,0x51,0x04,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xc2,0x01,0x00,0x00, -0xc1,0x01,0x00,0x00,0x66,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xc3,0x01,0x00,0x00,0xc2,0x01,0x00,0x00, -0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xc4,0x01,0x00,0x00,0xc3,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xc6,0x01,0x00,0x00,0x62,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xc8,0x01,0x00,0x00, -0xc6,0x01,0x00,0x00,0x4c,0x04,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xc9,0x01,0x00,0x00,0xc8,0x01,0x00,0x00, -0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xca,0x01,0x00,0x00,0xc9,0x01,0x00,0x00,0x41,0x00,0x08,0x00, -0x79,0x00,0x00,0x00,0xcc,0x01,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x51,0x04,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0xcd,0x01,0x00,0x00,0xcc,0x01,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xce,0x01,0x00,0x00,0xcd,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xcf,0x01,0x00,0x00, -0xce,0x01,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xd0,0x01,0x00,0x00,0xcf,0x01,0x00,0x00,0x82,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xd2,0x01,0x00,0x00, -0xd0,0x01,0x00,0x00,0xc4,0x01,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xd3,0x01,0x00,0x00,0xd2,0x01,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0xd4,0x01,0x00,0x00, -0xcd,0x01,0x00,0x00,0x66,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xd5,0x01,0x00,0x00,0xd4,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xd6,0x01,0x00,0x00, -0xd5,0x01,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xd8,0x01,0x00,0x00,0xd6,0x01,0x00,0x00,0xca,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xd9,0x01,0x00,0x00, -0xd8,0x01,0x00,0x00,0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0xda,0x01,0x00,0x00,0xd3,0x01,0x00,0x00,0xd9,0x01,0x00,0x00, -0x8e,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0xdb,0x01,0x00,0x00, -0xda,0x01,0x00,0x00,0xbc,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0xdc,0x01,0x00,0x00,0xbe,0x01,0x00,0x00, -0xbe,0x01,0x00,0x00,0x81,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0xdd,0x01,0x00,0x00,0xdb,0x01,0x00,0x00,0xdc,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xe3,0x01,0x00,0x00, -0xa2,0x00,0x00,0x00,0x51,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xe5,0x01,0x00,0x00,0xdd,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0xe6,0x01,0x00,0x00,0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xe3,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xe6,0x01,0x00,0x00, -0xe5,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xed,0x01,0x00,0x00,0xa2,0x00,0x00,0x00,0x52,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xee,0x01,0x00,0x00, -0xdd,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0xef,0x01,0x00,0x00,0x9b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xed,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0xef,0x01,0x00,0x00,0xee,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xf8,0x01,0x00,0x00,0x57,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xfa,0x01,0x00,0x00, -0x5b,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xfc,0x01,0x00,0x00,0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xfd,0x01,0x00,0x00,0xfc,0x01,0x00,0x00, -0x53,0x04,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xfe,0x01,0x00,0x00,0xfd,0x01,0x00,0x00,0x66,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xff,0x01,0x00,0x00, -0xfe,0x01,0x00,0x00,0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0xff,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x02,0x02,0x00,0x00, -0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x04,0x02,0x00,0x00,0x02,0x02,0x00,0x00,0x4e,0x04,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x05,0x02,0x00,0x00, -0x04,0x02,0x00,0x00,0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x06,0x02,0x00,0x00,0x05,0x02,0x00,0x00, -0x41,0x00,0x08,0x00,0x79,0x00,0x00,0x00,0x08,0x02,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x53,0x04,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x09,0x02,0x00,0x00,0x08,0x02,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x0a,0x02,0x00,0x00, -0x09,0x02,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0b,0x02,0x00,0x00,0x0a,0x02,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x0c,0x02,0x00,0x00,0x0b,0x02,0x00,0x00, -0x82,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x0e,0x02,0x00,0x00,0x0c,0x02,0x00,0x00,0x00,0x02,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x0f,0x02,0x00,0x00, -0x0e,0x02,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0x10,0x02,0x00,0x00,0x09,0x02,0x00,0x00,0x66,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x11,0x02,0x00,0x00, -0x10,0x02,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x12,0x02,0x00,0x00,0x11,0x02,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x14,0x02,0x00,0x00,0x12,0x02,0x00,0x00, -0x06,0x02,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x15,0x02,0x00,0x00,0x14,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x16,0x02,0x00,0x00,0x0f,0x02,0x00,0x00, -0x15,0x02,0x00,0x00,0x8e,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0x17,0x02,0x00,0x00,0x16,0x02,0x00,0x00,0xf8,0x01,0x00,0x00, -0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x18,0x02,0x00,0x00, -0xfa,0x01,0x00,0x00,0xfa,0x01,0x00,0x00,0x81,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x19,0x02,0x00,0x00,0x17,0x02,0x00,0x00, -0x18,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x1f,0x02,0x00,0x00,0xa2,0x00,0x00,0x00,0x53,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x21,0x02,0x00,0x00, -0x19,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x22,0x02,0x00,0x00,0x9b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x1f,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x22,0x02,0x00,0x00,0x21,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x29,0x02,0x00,0x00,0xa2,0x00,0x00,0x00, -0x54,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x2a,0x02,0x00,0x00,0x19,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x2b,0x02,0x00,0x00, -0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x29,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x2b,0x02,0x00,0x00,0x2a,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x34,0x02,0x00,0x00, -0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x36,0x02,0x00,0x00,0x5b,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x38,0x02,0x00,0x00,0x62,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x39,0x02,0x00,0x00, -0x38,0x02,0x00,0x00,0x55,0x04,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x3a,0x02,0x00,0x00,0x39,0x02,0x00,0x00, -0x66,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x3b,0x02,0x00,0x00,0x3a,0x02,0x00,0x00,0x4e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x3c,0x02,0x00,0x00, -0x3b,0x02,0x00,0x00,0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x3e,0x02,0x00,0x00,0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x40,0x02,0x00,0x00,0x3e,0x02,0x00,0x00, -0x4f,0x04,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x41,0x02,0x00,0x00,0x40,0x02,0x00,0x00,0x4e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x42,0x02,0x00,0x00, -0x41,0x02,0x00,0x00,0x41,0x00,0x08,0x00,0x79,0x00,0x00,0x00, -0x44,0x02,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x55,0x04,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x45,0x02,0x00,0x00, -0x44,0x02,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x46,0x02,0x00,0x00,0x45,0x02,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x47,0x02,0x00,0x00,0x46,0x02,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x48,0x02,0x00,0x00, -0x47,0x02,0x00,0x00,0x82,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4a,0x02,0x00,0x00,0x48,0x02,0x00,0x00, -0x3c,0x02,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x4b,0x02,0x00,0x00,0x4a,0x02,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0x4c,0x02,0x00,0x00,0x45,0x02,0x00,0x00, -0x66,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x4d,0x02,0x00,0x00,0x4c,0x02,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x4e,0x02,0x00,0x00,0x4d,0x02,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x50,0x02,0x00,0x00, -0x4e,0x02,0x00,0x00,0x42,0x02,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x51,0x02,0x00,0x00,0x50,0x02,0x00,0x00, -0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x52,0x02,0x00,0x00, -0x4b,0x02,0x00,0x00,0x51,0x02,0x00,0x00,0x8e,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x53,0x02,0x00,0x00,0x52,0x02,0x00,0x00, -0x34,0x02,0x00,0x00,0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0x54,0x02,0x00,0x00,0x36,0x02,0x00,0x00,0x36,0x02,0x00,0x00, -0x81,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x55,0x02,0x00,0x00, -0x53,0x02,0x00,0x00,0x54,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x5b,0x02,0x00,0x00,0xa2,0x00,0x00,0x00, -0x55,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x5d,0x02,0x00,0x00,0x55,0x02,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x5e,0x02,0x00,0x00, -0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x5b,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x5e,0x02,0x00,0x00,0x5d,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x65,0x02,0x00,0x00, -0xa2,0x00,0x00,0x00,0x56,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x66,0x02,0x00,0x00,0x55,0x02,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x67,0x02,0x00,0x00,0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x65,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x67,0x02,0x00,0x00, -0x66,0x02,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x70,0x02,0x00,0x00,0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x72,0x02,0x00,0x00,0x5b,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x74,0x02,0x00,0x00, -0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x75,0x02,0x00,0x00,0x74,0x02,0x00,0x00,0x57,0x04,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x76,0x02,0x00,0x00, -0x75,0x02,0x00,0x00,0x66,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x77,0x02,0x00,0x00,0x76,0x02,0x00,0x00, -0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x78,0x02,0x00,0x00,0x77,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x7a,0x02,0x00,0x00,0x62,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x7c,0x02,0x00,0x00, -0x7a,0x02,0x00,0x00,0x50,0x04,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x7d,0x02,0x00,0x00,0x7c,0x02,0x00,0x00, -0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x7e,0x02,0x00,0x00,0x7d,0x02,0x00,0x00,0x41,0x00,0x08,0x00, -0x79,0x00,0x00,0x00,0x80,0x02,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x57,0x04,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x81,0x02,0x00,0x00,0x80,0x02,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x82,0x02,0x00,0x00,0x81,0x02,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x83,0x02,0x00,0x00, -0x82,0x02,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x84,0x02,0x00,0x00,0x83,0x02,0x00,0x00,0x82,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x86,0x02,0x00,0x00, -0x84,0x02,0x00,0x00,0x78,0x02,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x87,0x02,0x00,0x00,0x86,0x02,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0x88,0x02,0x00,0x00, -0x81,0x02,0x00,0x00,0x66,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x89,0x02,0x00,0x00,0x88,0x02,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x8a,0x02,0x00,0x00, -0x89,0x02,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x8c,0x02,0x00,0x00,0x8a,0x02,0x00,0x00,0x7e,0x02,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x8d,0x02,0x00,0x00, -0x8c,0x02,0x00,0x00,0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0x8e,0x02,0x00,0x00,0x87,0x02,0x00,0x00,0x8d,0x02,0x00,0x00, -0x8e,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x8f,0x02,0x00,0x00, -0x8e,0x02,0x00,0x00,0x70,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x90,0x02,0x00,0x00,0x72,0x02,0x00,0x00, -0x72,0x02,0x00,0x00,0x81,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0x91,0x02,0x00,0x00,0x8f,0x02,0x00,0x00,0x90,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x97,0x02,0x00,0x00, -0xa2,0x00,0x00,0x00,0x57,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x99,0x02,0x00,0x00,0x91,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x9a,0x02,0x00,0x00,0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x97,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x9a,0x02,0x00,0x00, -0x99,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xa1,0x02,0x00,0x00,0xa2,0x00,0x00,0x00,0x58,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xa2,0x02,0x00,0x00, -0x91,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0xa3,0x02,0x00,0x00,0x9b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xa1,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0xa3,0x02,0x00,0x00,0xa2,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xac,0x02,0x00,0x00,0x57,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xae,0x02,0x00,0x00, -0x5b,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xb0,0x02,0x00,0x00,0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xb1,0x02,0x00,0x00,0xb0,0x02,0x00,0x00, -0x59,0x04,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xb2,0x02,0x00,0x00,0xb1,0x02,0x00,0x00,0x66,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xb3,0x02,0x00,0x00, -0xb2,0x02,0x00,0x00,0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb4,0x02,0x00,0x00,0xb3,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xb6,0x02,0x00,0x00, -0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xb8,0x02,0x00,0x00,0xb6,0x02,0x00,0x00,0x52,0x04,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xb9,0x02,0x00,0x00, -0xb8,0x02,0x00,0x00,0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xba,0x02,0x00,0x00,0xb9,0x02,0x00,0x00, -0x41,0x00,0x08,0x00,0x79,0x00,0x00,0x00,0xbc,0x02,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x59,0x04,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0xbd,0x02,0x00,0x00,0xbc,0x02,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xbe,0x02,0x00,0x00, -0xbd,0x02,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xbf,0x02,0x00,0x00,0xbe,0x02,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xc0,0x02,0x00,0x00,0xbf,0x02,0x00,0x00, -0x82,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xc2,0x02,0x00,0x00,0xc0,0x02,0x00,0x00,0xb4,0x02,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xc3,0x02,0x00,0x00, -0xc2,0x02,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0xc4,0x02,0x00,0x00,0xbd,0x02,0x00,0x00,0x66,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xc5,0x02,0x00,0x00, -0xc4,0x02,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xc6,0x02,0x00,0x00,0xc5,0x02,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xc8,0x02,0x00,0x00,0xc6,0x02,0x00,0x00, -0xba,0x02,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xc9,0x02,0x00,0x00,0xc8,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0xca,0x02,0x00,0x00,0xc3,0x02,0x00,0x00, -0xc9,0x02,0x00,0x00,0x8e,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0xcb,0x02,0x00,0x00,0xca,0x02,0x00,0x00,0xac,0x02,0x00,0x00, -0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0xcc,0x02,0x00,0x00, -0xae,0x02,0x00,0x00,0xae,0x02,0x00,0x00,0x81,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0xcd,0x02,0x00,0x00,0xcb,0x02,0x00,0x00, -0xcc,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xd3,0x02,0x00,0x00,0xa2,0x00,0x00,0x00,0x59,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0xd5,0x02,0x00,0x00, -0xcd,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0xd6,0x02,0x00,0x00,0x9b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xd3,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0xd6,0x02,0x00,0x00,0xd5,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xdd,0x02,0x00,0x00,0xa2,0x00,0x00,0x00, -0x5a,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xde,0x02,0x00,0x00,0xcd,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xdf,0x02,0x00,0x00, -0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xdd,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0xdf,0x02,0x00,0x00,0xde,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xe8,0x02,0x00,0x00, -0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xea,0x02,0x00,0x00,0x5b,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xec,0x02,0x00,0x00,0x62,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xed,0x02,0x00,0x00, -0xec,0x02,0x00,0x00,0x5b,0x04,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xee,0x02,0x00,0x00,0xed,0x02,0x00,0x00, -0x66,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xef,0x02,0x00,0x00,0xee,0x02,0x00,0x00,0x4e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xf0,0x02,0x00,0x00, -0xef,0x02,0x00,0x00,0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xf2,0x02,0x00,0x00,0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xf4,0x02,0x00,0x00,0xf2,0x02,0x00,0x00, -0x54,0x04,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xf5,0x02,0x00,0x00,0xf4,0x02,0x00,0x00,0x4e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xf6,0x02,0x00,0x00, -0xf5,0x02,0x00,0x00,0x41,0x00,0x08,0x00,0x79,0x00,0x00,0x00, -0xf8,0x02,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x5b,0x04,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0xf9,0x02,0x00,0x00, -0xf8,0x02,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xfa,0x02,0x00,0x00,0xf9,0x02,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xfb,0x02,0x00,0x00,0xfa,0x02,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xfc,0x02,0x00,0x00, -0xfb,0x02,0x00,0x00,0x82,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xfe,0x02,0x00,0x00,0xfc,0x02,0x00,0x00, -0xf0,0x02,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xff,0x02,0x00,0x00,0xfe,0x02,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0xf9,0x02,0x00,0x00, -0x66,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x01,0x03,0x00,0x00,0x00,0x03,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x02,0x03,0x00,0x00,0x01,0x03,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x04,0x03,0x00,0x00, -0x02,0x03,0x00,0x00,0xf6,0x02,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x05,0x03,0x00,0x00,0x04,0x03,0x00,0x00, -0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x06,0x03,0x00,0x00, -0xff,0x02,0x00,0x00,0x05,0x03,0x00,0x00,0x8e,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x07,0x03,0x00,0x00,0x06,0x03,0x00,0x00, -0xe8,0x02,0x00,0x00,0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0x08,0x03,0x00,0x00,0xea,0x02,0x00,0x00,0xea,0x02,0x00,0x00, -0x81,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x09,0x03,0x00,0x00, -0x07,0x03,0x00,0x00,0x08,0x03,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x0f,0x03,0x00,0x00,0xa2,0x00,0x00,0x00, -0x5b,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x11,0x03,0x00,0x00,0x09,0x03,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x12,0x03,0x00,0x00, -0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x0f,0x03,0x00,0x00, -0x3e,0x00,0x03,0x00,0x12,0x03,0x00,0x00,0x11,0x03,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x19,0x03,0x00,0x00, -0xa2,0x00,0x00,0x00,0x5c,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x1a,0x03,0x00,0x00,0x09,0x03,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x1b,0x03,0x00,0x00,0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x19,0x03,0x00,0x00,0x3e,0x00,0x03,0x00,0x1b,0x03,0x00,0x00, -0x1a,0x03,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x24,0x03,0x00,0x00,0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x26,0x03,0x00,0x00,0x5b,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x28,0x03,0x00,0x00, -0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x29,0x03,0x00,0x00,0x28,0x03,0x00,0x00,0x5d,0x04,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x2a,0x03,0x00,0x00, -0x29,0x03,0x00,0x00,0x66,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x2b,0x03,0x00,0x00,0x2a,0x03,0x00,0x00, -0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x2c,0x03,0x00,0x00,0x2b,0x03,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x2e,0x03,0x00,0x00,0x62,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x30,0x03,0x00,0x00, -0x2e,0x03,0x00,0x00,0x56,0x04,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x31,0x03,0x00,0x00,0x30,0x03,0x00,0x00, -0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x32,0x03,0x00,0x00,0x31,0x03,0x00,0x00,0x41,0x00,0x08,0x00, -0x79,0x00,0x00,0x00,0x34,0x03,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x5d,0x04,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x35,0x03,0x00,0x00,0x34,0x03,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x36,0x03,0x00,0x00,0x35,0x03,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x37,0x03,0x00,0x00, -0x36,0x03,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x38,0x03,0x00,0x00,0x37,0x03,0x00,0x00,0x82,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3a,0x03,0x00,0x00, -0x38,0x03,0x00,0x00,0x2c,0x03,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x3b,0x03,0x00,0x00,0x3a,0x03,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0x3c,0x03,0x00,0x00, -0x35,0x03,0x00,0x00,0x66,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x3d,0x03,0x00,0x00,0x3c,0x03,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x3e,0x03,0x00,0x00, -0x3d,0x03,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x40,0x03,0x00,0x00,0x3e,0x03,0x00,0x00,0x32,0x03,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x41,0x03,0x00,0x00, -0x40,0x03,0x00,0x00,0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0x42,0x03,0x00,0x00,0x3b,0x03,0x00,0x00,0x41,0x03,0x00,0x00, -0x8e,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x43,0x03,0x00,0x00, -0x42,0x03,0x00,0x00,0x24,0x03,0x00,0x00,0x50,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x44,0x03,0x00,0x00,0x26,0x03,0x00,0x00, -0x26,0x03,0x00,0x00,0x81,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0x45,0x03,0x00,0x00,0x43,0x03,0x00,0x00,0x44,0x03,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4b,0x03,0x00,0x00, -0xa2,0x00,0x00,0x00,0x5d,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x4d,0x03,0x00,0x00,0x45,0x03,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x4e,0x03,0x00,0x00,0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x4b,0x03,0x00,0x00,0x3e,0x00,0x03,0x00,0x4e,0x03,0x00,0x00, -0x4d,0x03,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x55,0x03,0x00,0x00,0xa2,0x00,0x00,0x00,0x5e,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x56,0x03,0x00,0x00, -0x45,0x03,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x57,0x03,0x00,0x00,0x9b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x55,0x03,0x00,0x00,0x3e,0x00,0x03,0x00, -0x57,0x03,0x00,0x00,0x56,0x03,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x60,0x03,0x00,0x00,0x57,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x62,0x03,0x00,0x00, -0x5b,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x64,0x03,0x00,0x00,0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x65,0x03,0x00,0x00,0x64,0x03,0x00,0x00, -0x6e,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x66,0x03,0x00,0x00,0x65,0x03,0x00,0x00,0x66,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x67,0x03,0x00,0x00, -0x66,0x03,0x00,0x00,0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x68,0x03,0x00,0x00,0x67,0x03,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x6a,0x03,0x00,0x00, -0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x6c,0x03,0x00,0x00,0x6a,0x03,0x00,0x00,0x58,0x04,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x6d,0x03,0x00,0x00, -0x6c,0x03,0x00,0x00,0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x6e,0x03,0x00,0x00,0x6d,0x03,0x00,0x00, -0x41,0x00,0x08,0x00,0x79,0x00,0x00,0x00,0x70,0x03,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x71,0x03,0x00,0x00,0x70,0x03,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x72,0x03,0x00,0x00, -0x71,0x03,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x73,0x03,0x00,0x00,0x72,0x03,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x74,0x03,0x00,0x00,0x73,0x03,0x00,0x00, -0x82,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x76,0x03,0x00,0x00,0x74,0x03,0x00,0x00,0x68,0x03,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x77,0x03,0x00,0x00, -0x76,0x03,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0x78,0x03,0x00,0x00,0x71,0x03,0x00,0x00,0x66,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x79,0x03,0x00,0x00, -0x78,0x03,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x7a,0x03,0x00,0x00,0x79,0x03,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x7c,0x03,0x00,0x00,0x7a,0x03,0x00,0x00, -0x6e,0x03,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x7d,0x03,0x00,0x00,0x7c,0x03,0x00,0x00,0x50,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x7e,0x03,0x00,0x00,0x77,0x03,0x00,0x00, -0x7d,0x03,0x00,0x00,0x8e,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0x7f,0x03,0x00,0x00,0x7e,0x03,0x00,0x00,0x60,0x03,0x00,0x00, -0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x80,0x03,0x00,0x00, -0x62,0x03,0x00,0x00,0x62,0x03,0x00,0x00,0x81,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x81,0x03,0x00,0x00,0x7f,0x03,0x00,0x00, -0x80,0x03,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x87,0x03,0x00,0x00,0xa2,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x89,0x03,0x00,0x00, -0x81,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x8a,0x03,0x00,0x00,0x9b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x87,0x03,0x00,0x00,0x3e,0x00,0x03,0x00, -0x8a,0x03,0x00,0x00,0x89,0x03,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x91,0x03,0x00,0x00,0xa2,0x00,0x00,0x00, -0x5f,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x92,0x03,0x00,0x00,0x81,0x03,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x93,0x03,0x00,0x00, -0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x91,0x03,0x00,0x00, -0x3e,0x00,0x03,0x00,0x93,0x03,0x00,0x00,0x92,0x03,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x9c,0x03,0x00,0x00, -0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x9e,0x03,0x00,0x00,0x5b,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xa0,0x03,0x00,0x00,0x62,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xa1,0x03,0x00,0x00, -0xa0,0x03,0x00,0x00,0x4b,0x04,0x00,0x00,0xc4,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xa2,0x03,0x00,0x00,0xa1,0x03,0x00,0x00, -0x66,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xa3,0x03,0x00,0x00,0xa2,0x03,0x00,0x00,0x4e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xa4,0x03,0x00,0x00, -0xa3,0x03,0x00,0x00,0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xa6,0x03,0x00,0x00,0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xa8,0x03,0x00,0x00,0xa6,0x03,0x00,0x00, -0x5a,0x04,0x00,0x00,0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xa9,0x03,0x00,0x00,0xa8,0x03,0x00,0x00,0x4e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xaa,0x03,0x00,0x00, -0xa9,0x03,0x00,0x00,0x41,0x00,0x08,0x00,0x79,0x00,0x00,0x00, -0xac,0x03,0x00,0x00,0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x4b,0x04,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0xad,0x03,0x00,0x00, -0xac,0x03,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xae,0x03,0x00,0x00,0xad,0x03,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xaf,0x03,0x00,0x00,0xae,0x03,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xb0,0x03,0x00,0x00, -0xaf,0x03,0x00,0x00,0x82,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xb2,0x03,0x00,0x00,0xb0,0x03,0x00,0x00, -0xa4,0x03,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xb3,0x03,0x00,0x00,0xb2,0x03,0x00,0x00,0xc2,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0xb4,0x03,0x00,0x00,0xad,0x03,0x00,0x00, -0x66,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0xb5,0x03,0x00,0x00,0xb4,0x03,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb6,0x03,0x00,0x00,0xb5,0x03,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xb8,0x03,0x00,0x00, -0xb6,0x03,0x00,0x00,0xaa,0x03,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xb9,0x03,0x00,0x00,0xb8,0x03,0x00,0x00, -0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0xba,0x03,0x00,0x00, -0xb3,0x03,0x00,0x00,0xb9,0x03,0x00,0x00,0x8e,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0xbb,0x03,0x00,0x00,0xba,0x03,0x00,0x00, -0x9c,0x03,0x00,0x00,0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0xbc,0x03,0x00,0x00,0x9e,0x03,0x00,0x00,0x9e,0x03,0x00,0x00, -0x81,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0xbd,0x03,0x00,0x00, -0xbb,0x03,0x00,0x00,0xbc,0x03,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xc3,0x03,0x00,0x00,0xa2,0x00,0x00,0x00, -0x4b,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0xc5,0x03,0x00,0x00,0xbd,0x03,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0xc6,0x03,0x00,0x00, -0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xc3,0x03,0x00,0x00, -0x3e,0x00,0x03,0x00,0xc6,0x03,0x00,0x00,0xc5,0x03,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xcd,0x03,0x00,0x00, -0xa2,0x00,0x00,0x00,0x60,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0xce,0x03,0x00,0x00,0xbd,0x03,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0xcf,0x03,0x00,0x00,0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xcd,0x03,0x00,0x00,0x3e,0x00,0x03,0x00,0xcf,0x03,0x00,0x00, -0xce,0x03,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xd8,0x03,0x00,0x00,0x57,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xda,0x03,0x00,0x00,0x5b,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0xdc,0x03,0x00,0x00, -0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0xdd,0x03,0x00,0x00,0xdc,0x03,0x00,0x00,0x4d,0x04,0x00,0x00, -0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xde,0x03,0x00,0x00, -0xdd,0x03,0x00,0x00,0x66,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xdf,0x03,0x00,0x00,0xde,0x03,0x00,0x00, -0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xe0,0x03,0x00,0x00,0xdf,0x03,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xe2,0x03,0x00,0x00,0x62,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0xe4,0x03,0x00,0x00, -0xe2,0x03,0x00,0x00,0x5c,0x04,0x00,0x00,0xc7,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0xe5,0x03,0x00,0x00,0xe4,0x03,0x00,0x00, -0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xe6,0x03,0x00,0x00,0xe5,0x03,0x00,0x00,0x41,0x00,0x08,0x00, -0x79,0x00,0x00,0x00,0xe8,0x03,0x00,0x00,0x54,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x4d,0x04,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0xe9,0x03,0x00,0x00,0xe8,0x03,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xea,0x03,0x00,0x00,0xe9,0x03,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xeb,0x03,0x00,0x00, -0xea,0x03,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xec,0x03,0x00,0x00,0xeb,0x03,0x00,0x00,0x82,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xee,0x03,0x00,0x00, -0xec,0x03,0x00,0x00,0xe0,0x03,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xef,0x03,0x00,0x00,0xee,0x03,0x00,0x00, -0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0xf0,0x03,0x00,0x00, -0xe9,0x03,0x00,0x00,0x66,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0xf1,0x03,0x00,0x00,0xf0,0x03,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xf2,0x03,0x00,0x00, -0xf1,0x03,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xf4,0x03,0x00,0x00,0xf2,0x03,0x00,0x00,0xe6,0x03,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xf5,0x03,0x00,0x00, -0xf4,0x03,0x00,0x00,0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0xf6,0x03,0x00,0x00,0xef,0x03,0x00,0x00,0xf5,0x03,0x00,0x00, -0x8e,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0xf7,0x03,0x00,0x00, -0xf6,0x03,0x00,0x00,0xd8,0x03,0x00,0x00,0x50,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0xf8,0x03,0x00,0x00,0xda,0x03,0x00,0x00, -0xda,0x03,0x00,0x00,0x81,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0xf9,0x03,0x00,0x00,0xf7,0x03,0x00,0x00,0xf8,0x03,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xff,0x03,0x00,0x00, -0xa2,0x00,0x00,0x00,0x4d,0x04,0x00,0x00,0x51,0x00,0x05,0x00, -0x4a,0x00,0x00,0x00,0x01,0x04,0x00,0x00,0xf9,0x03,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x02,0x04,0x00,0x00,0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xff,0x03,0x00,0x00,0x3e,0x00,0x03,0x00,0x02,0x04,0x00,0x00, -0x01,0x04,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x09,0x04,0x00,0x00,0xa2,0x00,0x00,0x00,0x61,0x04,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x0a,0x04,0x00,0x00, -0xf9,0x03,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x0b,0x04,0x00,0x00,0x9b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x09,0x04,0x00,0x00,0x3e,0x00,0x03,0x00, -0x0b,0x04,0x00,0x00,0x0a,0x04,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x14,0x04,0x00,0x00,0x57,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x16,0x04,0x00,0x00, -0x5b,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x18,0x04,0x00,0x00,0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x19,0x04,0x00,0x00,0x18,0x04,0x00,0x00, -0x82,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x1a,0x04,0x00,0x00,0x19,0x04,0x00,0x00,0x66,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x1b,0x04,0x00,0x00, -0x1a,0x04,0x00,0x00,0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x1c,0x04,0x00,0x00,0x1b,0x04,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x1e,0x04,0x00,0x00, -0x62,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x09,0x00,0x00,0x00, -0x20,0x04,0x00,0x00,0x1e,0x04,0x00,0x00,0x5e,0x04,0x00,0x00, -0xc7,0x00,0x05,0x00,0x09,0x00,0x00,0x00,0x21,0x04,0x00,0x00, -0x20,0x04,0x00,0x00,0x4e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x22,0x04,0x00,0x00,0x21,0x04,0x00,0x00, -0x41,0x00,0x08,0x00,0x79,0x00,0x00,0x00,0x24,0x04,0x00,0x00, -0x54,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4d,0x00,0x00,0x00,0x25,0x04,0x00,0x00,0x24,0x04,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x26,0x04,0x00,0x00, -0x25,0x04,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x27,0x04,0x00,0x00,0x26,0x04,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x28,0x04,0x00,0x00,0x27,0x04,0x00,0x00, -0x82,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x2a,0x04,0x00,0x00,0x28,0x04,0x00,0x00,0x1c,0x04,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x2b,0x04,0x00,0x00, -0x2a,0x04,0x00,0x00,0xc2,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0x2c,0x04,0x00,0x00,0x25,0x04,0x00,0x00,0x66,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x2d,0x04,0x00,0x00, -0x2c,0x04,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x2e,0x04,0x00,0x00,0x2d,0x04,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x30,0x04,0x00,0x00,0x2e,0x04,0x00,0x00, -0x22,0x04,0x00,0x00,0x6f,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x31,0x04,0x00,0x00,0x30,0x04,0x00,0x00,0x50,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x32,0x04,0x00,0x00,0x2b,0x04,0x00,0x00, -0x31,0x04,0x00,0x00,0x8e,0x00,0x05,0x00,0x7c,0x00,0x00,0x00, -0x33,0x04,0x00,0x00,0x32,0x04,0x00,0x00,0x14,0x04,0x00,0x00, -0x50,0x00,0x05,0x00,0x7c,0x00,0x00,0x00,0x34,0x04,0x00,0x00, -0x16,0x04,0x00,0x00,0x16,0x04,0x00,0x00,0x81,0x00,0x05,0x00, -0x7c,0x00,0x00,0x00,0x35,0x04,0x00,0x00,0x33,0x04,0x00,0x00, -0x34,0x04,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3b,0x04,0x00,0x00,0xa2,0x00,0x00,0x00,0x82,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00,0x3d,0x04,0x00,0x00, -0x35,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x56,0x00,0x00,0x00,0x3e,0x04,0x00,0x00,0x9b,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x3b,0x04,0x00,0x00,0x3e,0x00,0x03,0x00, -0x3e,0x04,0x00,0x00,0x3d,0x04,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x45,0x04,0x00,0x00,0xa2,0x00,0x00,0x00, -0x62,0x04,0x00,0x00,0x51,0x00,0x05,0x00,0x4a,0x00,0x00,0x00, -0x46,0x04,0x00,0x00,0x35,0x04,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x47,0x04,0x00,0x00, -0x9b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x04,0x00,0x00, -0x3e,0x00,0x03,0x00,0x47,0x04,0x00,0x00,0x46,0x04,0x00,0x00, -0xf9,0x00,0x02,0x00,0xba,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0xba,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, - -}; -const uint64_t dequant_q5_1_len = 12768; - -unsigned char dequant_q5_1_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x95,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, @@ -11163,505 +6149,10 @@ unsigned char dequant_q5_1_fp32_data[] = { 0xbe,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t dequant_q5_1_fp32_len = 13548; +const uint64_t dequant_q5_1_len = 13548; unsigned char dequant_q5_K_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x99,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x25,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x12,0x01,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x17,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x23,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x23,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x23,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x33,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x48,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x4c,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x4d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x4d,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x30,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0xb0,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x4f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x4f,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x4f,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x51,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x51,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0f,0x01,0x00,0x00, -0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x10,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x10,0x01,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x10,0x01,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x12,0x01,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x12,0x01,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x82,0x01,0x00,0x00, -0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, -0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x14,0x00,0x02,0x00,0x11,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x15,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x15,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x16,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x19,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x1e,0x00,0x06,0x00,0x23,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x24,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x26,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x16,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x42,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x45,0x00,0x00,0x00,0x42,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x47,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, -0x48,0x00,0x00,0x00,0x46,0x00,0x00,0x00,0x47,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x1c,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x46,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x4b,0x00,0x00,0x00,0x80,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x46,0x00,0x00,0x00, -0x4b,0x00,0x00,0x00,0x1e,0x00,0x06,0x00,0x4d,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x4c,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x4e,0x00,0x00,0x00, -0x4d,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x4f,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x42,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x66,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x78,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x46,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x7f,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x95,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x9c,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xcb,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xdf,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x0f,0x01,0x00,0x00,0x42,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x10,0x01,0x00,0x00,0x0f,0x01,0x00,0x00, -0x20,0x00,0x04,0x00,0x11,0x01,0x00,0x00,0x0c,0x00,0x00,0x00, -0x10,0x01,0x00,0x00,0x3b,0x00,0x04,0x00,0x11,0x01,0x00,0x00, -0x12,0x01,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x63,0x01,0x00,0x00,0x21,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x81,0x01,0x00,0x00, -0x40,0x00,0x00,0x00,0x2c,0x00,0x06,0x00,0x15,0x00,0x00,0x00, -0x82,0x01,0x00,0x00,0x81,0x01,0x00,0x00,0x58,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x2a,0x00,0x03,0x00,0x11,0x00,0x00,0x00, -0x85,0x01,0x00,0x00,0x29,0x00,0x03,0x00,0x11,0x00,0x00,0x00, -0x88,0x01,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x83,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00, -0x18,0x00,0x00,0x00,0x84,0x01,0x00,0x00,0xf8,0x00,0x02,0x00, -0x84,0x01,0x00,0x00,0xf9,0x00,0x02,0x00,0x0a,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x0a,0x00,0x00,0x00,0xf5,0x00,0x07,0x00, -0x06,0x00,0x00,0x00,0x8b,0x01,0x00,0x00,0x09,0x00,0x00,0x00, -0x84,0x01,0x00,0x00,0x80,0x01,0x00,0x00,0x0d,0x00,0x00,0x00, -0xb1,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x8b,0x01,0x00,0x00,0x10,0x00,0x00,0x00,0xf6,0x00,0x04,0x00, -0x0c,0x00,0x00,0x00,0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x12,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x0b,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x19,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x14,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x8b,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x14,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00, -0x25,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0x27,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x26,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x25,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x2c,0x00,0x00,0x00, -0x28,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x2c,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0xaf,0x00,0x05,0x00,0x11,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x30,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x2e,0x00,0x00,0x00,0x2f,0x00,0x00,0x00, -0x30,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x2f,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x0c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x30,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x19,0x00,0x00,0x00, -0x34,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x35,0x00,0x00,0x00, -0x34,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x36,0x00,0x00,0x00,0x35,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x36,0x00,0x00,0x00, -0x39,0x00,0x00,0x00,0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3d,0x00,0x00,0x00,0x36,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x53,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x42,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x53,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x42,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x61,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x3d,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x61,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x68,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x68,0x00,0x00,0x00, -0x63,0x00,0x00,0x00,0xb1,0x00,0x05,0x00,0x11,0x00,0x00,0x00, -0x71,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x70,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x73,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x71,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0x8d,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x72,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x78,0x00,0x00,0x00,0x79,0x00,0x00,0x00, -0x51,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0x7a,0x00,0x00,0x00,0x79,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x7a,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x7c,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x72,0x00,0x04,0x00,0x7f,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0x81,0x00,0x00,0x00,0x80,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x85,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x78,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0x85,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x87,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x88,0x00,0x00,0x00,0x87,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x89,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x8a,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, -0x72,0x00,0x04,0x00,0x7f,0x00,0x00,0x00,0x8b,0x00,0x00,0x00, -0x8a,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x8b,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x73,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x8d,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x90,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x78,0x00,0x00,0x00,0x91,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0x90,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x92,0x00,0x00,0x00,0x91,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x93,0x00,0x00,0x00,0x92,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x94,0x00,0x00,0x00, -0x93,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x96,0x00,0x00,0x00,0x94,0x00,0x00,0x00,0x95,0x00,0x00,0x00, -0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x99,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x78,0x00,0x00,0x00,0x9a,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0x99,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x9b,0x00,0x00,0x00,0x9a,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x46,0x00,0x00,0x00,0x9d,0x00,0x00,0x00,0x9b,0x00,0x00,0x00, -0x9c,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x46,0x00,0x00,0x00, -0x9e,0x00,0x00,0x00,0x9d,0x00,0x00,0x00,0x70,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x9f,0x00,0x00,0x00, -0x9e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xa0,0x00,0x00,0x00,0x9f,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0xa0,0x00,0x00,0x00,0x72,0x00,0x04,0x00,0x7f,0x00,0x00,0x00, -0xa2,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0xa3,0x00,0x00,0x00,0xa2,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0xa8,0x00,0x00,0x00, -0x91,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x46,0x00,0x00,0x00, -0xa9,0x00,0x00,0x00,0xa8,0x00,0x00,0x00,0x70,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x78,0x00,0x00,0x00,0xac,0x00,0x00,0x00, -0x51,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0xad,0x00,0x00,0x00,0xac,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x46,0x00,0x00,0x00,0xae,0x00,0x00,0x00, -0xad,0x00,0x00,0x00,0x9c,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x46,0x00,0x00,0x00,0xaf,0x00,0x00,0x00,0xae,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x46,0x00,0x00,0x00, -0xb0,0x00,0x00,0x00,0xa9,0x00,0x00,0x00,0xaf,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x73,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x73,0x00,0x00,0x00,0xf5,0x00,0x07,0x00,0x46,0x00,0x00,0x00, -0x8d,0x01,0x00,0x00,0x8c,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0xb0,0x00,0x00,0x00,0x8d,0x00,0x00,0x00,0xf5,0x00,0x07,0x00, -0x46,0x00,0x00,0x00,0x8c,0x01,0x00,0x00,0x81,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0xa3,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x42,0x00,0x00,0x00,0xb4,0x00,0x00,0x00, -0x8c,0x01,0x00,0x00,0x85,0x00,0x05,0x00,0x42,0x00,0x00,0x00, -0xb5,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0xb4,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x42,0x00,0x00,0x00,0xb9,0x00,0x00,0x00, -0x8d,0x01,0x00,0x00,0x85,0x00,0x05,0x00,0x42,0x00,0x00,0x00, -0xba,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0xb9,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0xbe,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x71,0x00,0x00,0x00,0xbd,0x00,0x00,0x00, -0xd4,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xbd,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xc1,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x78,0x00,0x00,0x00,0xc2,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0xc1,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0xc3,0x00,0x00,0x00,0xc2,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0xc4,0x00,0x00,0x00,0xc3,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xc5,0x00,0x00,0x00, -0xc4,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xc6,0x00,0x00,0x00,0xc5,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, -0x72,0x00,0x04,0x00,0x7f,0x00,0x00,0x00,0xc7,0x00,0x00,0x00, -0xc6,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0xc8,0x00,0x00,0x00,0xc7,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xcc,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0xcb,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x78,0x00,0x00,0x00, -0xcd,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0xcc,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0xce,0x00,0x00,0x00, -0xcd,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0xcf,0x00,0x00,0x00,0xce,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xd0,0x00,0x00,0x00,0xcf,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xd1,0x00,0x00,0x00, -0xd0,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x72,0x00,0x04,0x00, -0x7f,0x00,0x00,0x00,0xd2,0x00,0x00,0x00,0xd1,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0xd3,0x00,0x00,0x00, -0xd2,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0xbe,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xd4,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xd7,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0xcb,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x78,0x00,0x00,0x00, -0xd8,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0xd7,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0xd9,0x00,0x00,0x00, -0xd8,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0xda,0x00,0x00,0x00,0xd9,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xdb,0x00,0x00,0x00,0xda,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xdc,0x00,0x00,0x00, -0xdb,0x00,0x00,0x00,0x95,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0xdf,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x78,0x00,0x00,0x00, -0xe1,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0xe0,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0xe2,0x00,0x00,0x00, -0xe1,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x46,0x00,0x00,0x00, -0xe3,0x00,0x00,0x00,0xe2,0x00,0x00,0x00,0x9c,0x00,0x00,0x00, -0xc4,0x00,0x05,0x00,0x46,0x00,0x00,0x00,0xe4,0x00,0x00,0x00, -0xe3,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0xe5,0x00,0x00,0x00,0xe4,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xe6,0x00,0x00,0x00, -0xe5,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xe7,0x00,0x00,0x00,0xdc,0x00,0x00,0x00,0xe6,0x00,0x00,0x00, -0x72,0x00,0x04,0x00,0x7f,0x00,0x00,0x00,0xe8,0x00,0x00,0x00, -0xe7,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0xe9,0x00,0x00,0x00,0xe8,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0xee,0x00,0x00,0x00,0xd8,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x46,0x00,0x00,0x00,0xef,0x00,0x00,0x00, -0xee,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xf2,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x78,0x00,0x00,0x00, -0xf3,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0xf2,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0xf4,0x00,0x00,0x00, -0xf3,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x46,0x00,0x00,0x00, -0xf5,0x00,0x00,0x00,0xf4,0x00,0x00,0x00,0x9c,0x00,0x00,0x00, -0xc4,0x00,0x05,0x00,0x46,0x00,0x00,0x00,0xf6,0x00,0x00,0x00, -0xf5,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x46,0x00,0x00,0x00,0xf7,0x00,0x00,0x00,0xef,0x00,0x00,0x00, -0xf6,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0xbe,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xbe,0x00,0x00,0x00,0xf5,0x00,0x07,0x00, -0x46,0x00,0x00,0x00,0x8f,0x01,0x00,0x00,0xd3,0x00,0x00,0x00, -0xbd,0x00,0x00,0x00,0xf7,0x00,0x00,0x00,0xd4,0x00,0x00,0x00, -0xf5,0x00,0x07,0x00,0x46,0x00,0x00,0x00,0x8e,0x01,0x00,0x00, -0xc8,0x00,0x00,0x00,0xbd,0x00,0x00,0x00,0xe9,0x00,0x00,0x00, -0xd4,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x42,0x00,0x00,0x00, -0xfb,0x00,0x00,0x00,0x8e,0x01,0x00,0x00,0x85,0x00,0x05,0x00, -0x42,0x00,0x00,0x00,0xfc,0x00,0x00,0x00,0x55,0x00,0x00,0x00, -0xfb,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x42,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x8f,0x01,0x00,0x00,0x85,0x00,0x05,0x00, -0x42,0x00,0x00,0x00,0x01,0x01,0x00,0x00,0x5a,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0xc4,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x05,0x01,0x00,0x00,0x29,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x72,0x00,0x04,0x00,0x7f,0x00,0x00,0x00,0x06,0x01,0x00,0x00, -0x05,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x07,0x01,0x00,0x00,0x06,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x0b,0x01,0x00,0x00,0x41,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x0c,0x01,0x00,0x00,0x29,0x00,0x00,0x00,0x0b,0x01,0x00,0x00, -0x72,0x00,0x04,0x00,0x7f,0x00,0x00,0x00,0x0d,0x01,0x00,0x00, -0x0c,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x0e,0x01,0x00,0x00,0x0d,0x01,0x00,0x00,0x41,0x00,0x08,0x00, -0x78,0x00,0x00,0x00,0x17,0x01,0x00,0x00,0x51,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0xdf,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x18,0x01,0x00,0x00,0x17,0x01,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x19,0x01,0x00,0x00,0x18,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1a,0x01,0x00,0x00, -0x19,0x01,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x1b,0x01,0x00,0x00,0x1a,0x01,0x00,0x00,0x95,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x78,0x00,0x00,0x00,0x1e,0x01,0x00,0x00, -0x51,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0x1f,0x01,0x00,0x00,0x1e,0x01,0x00,0x00, -0xc7,0x00,0x05,0x00,0x46,0x00,0x00,0x00,0x21,0x01,0x00,0x00, -0x1f,0x01,0x00,0x00,0x07,0x01,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x22,0x01,0x00,0x00,0x21,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x23,0x01,0x00,0x00, -0x22,0x01,0x00,0x00,0xab,0x00,0x05,0x00,0x11,0x00,0x00,0x00, -0x24,0x01,0x00,0x00,0x23,0x01,0x00,0x00,0x09,0x00,0x00,0x00, -0xa9,0x00,0x06,0x00,0x06,0x00,0x00,0x00,0x25,0x01,0x00,0x00, -0x24,0x01,0x00,0x00,0x39,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x26,0x01,0x00,0x00, -0x1b,0x01,0x00,0x00,0x25,0x01,0x00,0x00,0x6f,0x00,0x04,0x00, -0x42,0x00,0x00,0x00,0x27,0x01,0x00,0x00,0x26,0x01,0x00,0x00, -0x7f,0x00,0x04,0x00,0x42,0x00,0x00,0x00,0x96,0x01,0x00,0x00, -0xba,0x00,0x00,0x00,0x0c,0x00,0x08,0x00,0x42,0x00,0x00,0x00, -0x2a,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0xb5,0x00,0x00,0x00,0x27,0x01,0x00,0x00,0x96,0x01,0x00,0x00, -0x41,0x00,0x06,0x00,0x53,0x00,0x00,0x00,0x2b,0x01,0x00,0x00, -0x12,0x01,0x00,0x00,0x09,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x2b,0x01,0x00,0x00,0x2a,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x2d,0x01,0x00,0x00, -0x64,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x31,0x01,0x00,0x00,0x6b,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x78,0x00,0x00,0x00, -0x32,0x01,0x00,0x00,0x51,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0xdf,0x00,0x00,0x00,0x31,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0x33,0x01,0x00,0x00, -0x32,0x01,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x34,0x01,0x00,0x00,0x33,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x35,0x01,0x00,0x00,0x34,0x01,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x36,0x01,0x00,0x00, -0x35,0x01,0x00,0x00,0x95,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x39,0x01,0x00,0x00,0x63,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x78,0x00,0x00,0x00, -0x3a,0x01,0x00,0x00,0x51,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x39,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0x3b,0x01,0x00,0x00, -0x3a,0x01,0x00,0x00,0xc7,0x00,0x05,0x00,0x46,0x00,0x00,0x00, -0x3d,0x01,0x00,0x00,0x3b,0x01,0x00,0x00,0x07,0x01,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x3e,0x01,0x00,0x00, -0x3d,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x3f,0x01,0x00,0x00,0x3e,0x01,0x00,0x00,0xab,0x00,0x05,0x00, -0x11,0x00,0x00,0x00,0x40,0x01,0x00,0x00,0x3f,0x01,0x00,0x00, -0x09,0x00,0x00,0x00,0xa9,0x00,0x06,0x00,0x06,0x00,0x00,0x00, -0x41,0x01,0x00,0x00,0x40,0x01,0x00,0x00,0x39,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x42,0x01,0x00,0x00,0x36,0x01,0x00,0x00,0x41,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x42,0x00,0x00,0x00,0x43,0x01,0x00,0x00, -0x42,0x01,0x00,0x00,0x0c,0x00,0x08,0x00,0x42,0x00,0x00,0x00, -0x46,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0xb5,0x00,0x00,0x00,0x43,0x01,0x00,0x00,0x96,0x01,0x00,0x00, -0x41,0x00,0x06,0x00,0x53,0x00,0x00,0x00,0x47,0x01,0x00,0x00, -0x12,0x01,0x00,0x00,0x09,0x00,0x00,0x00,0x2d,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x47,0x01,0x00,0x00,0x46,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x49,0x01,0x00,0x00, -0x64,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x46,0x00,0x00,0x00,0x4e,0x01,0x00,0x00,0x17,0x01,0x00,0x00, -0xc2,0x00,0x05,0x00,0x46,0x00,0x00,0x00,0x4f,0x01,0x00,0x00, -0x4e,0x01,0x00,0x00,0x70,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x50,0x01,0x00,0x00,0x4f,0x01,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x51,0x01,0x00,0x00, -0x50,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x55,0x01,0x00,0x00,0x1e,0x01,0x00,0x00,0xc7,0x00,0x05,0x00, -0x46,0x00,0x00,0x00,0x57,0x01,0x00,0x00,0x55,0x01,0x00,0x00, -0x0e,0x01,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x58,0x01,0x00,0x00,0x57,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x59,0x01,0x00,0x00,0x58,0x01,0x00,0x00, -0xab,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x5a,0x01,0x00,0x00, -0x59,0x01,0x00,0x00,0x09,0x00,0x00,0x00,0xa9,0x00,0x06,0x00, -0x06,0x00,0x00,0x00,0x5b,0x01,0x00,0x00,0x5a,0x01,0x00,0x00, -0x39,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x5c,0x01,0x00,0x00,0x51,0x01,0x00,0x00, -0x5b,0x01,0x00,0x00,0x6f,0x00,0x04,0x00,0x42,0x00,0x00,0x00, -0x5d,0x01,0x00,0x00,0x5c,0x01,0x00,0x00,0x7f,0x00,0x04,0x00, -0x42,0x00,0x00,0x00,0x98,0x01,0x00,0x00,0x01,0x01,0x00,0x00, -0x0c,0x00,0x08,0x00,0x42,0x00,0x00,0x00,0x60,0x01,0x00,0x00, -0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0xfc,0x00,0x00,0x00, -0x5d,0x01,0x00,0x00,0x98,0x01,0x00,0x00,0x41,0x00,0x06,0x00, -0x53,0x00,0x00,0x00,0x61,0x01,0x00,0x00,0x12,0x01,0x00,0x00, -0x09,0x00,0x00,0x00,0x49,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x61,0x01,0x00,0x00,0x60,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x64,0x01,0x00,0x00,0x64,0x00,0x00,0x00, -0x63,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00, -0x6a,0x01,0x00,0x00,0x32,0x01,0x00,0x00,0xc2,0x00,0x05,0x00, -0x46,0x00,0x00,0x00,0x6b,0x01,0x00,0x00,0x6a,0x01,0x00,0x00, -0x70,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x6c,0x01,0x00,0x00,0x6b,0x01,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x6d,0x01,0x00,0x00,0x6c,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x46,0x00,0x00,0x00,0x72,0x01,0x00,0x00, -0x3a,0x01,0x00,0x00,0xc7,0x00,0x05,0x00,0x46,0x00,0x00,0x00, -0x74,0x01,0x00,0x00,0x72,0x01,0x00,0x00,0x0e,0x01,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x75,0x01,0x00,0x00, -0x74,0x01,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x76,0x01,0x00,0x00,0x75,0x01,0x00,0x00,0xab,0x00,0x05,0x00, -0x11,0x00,0x00,0x00,0x77,0x01,0x00,0x00,0x76,0x01,0x00,0x00, -0x09,0x00,0x00,0x00,0xa9,0x00,0x06,0x00,0x06,0x00,0x00,0x00, -0x78,0x01,0x00,0x00,0x77,0x01,0x00,0x00,0x39,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x79,0x01,0x00,0x00,0x6d,0x01,0x00,0x00,0x78,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x42,0x00,0x00,0x00,0x7a,0x01,0x00,0x00, -0x79,0x01,0x00,0x00,0x0c,0x00,0x08,0x00,0x42,0x00,0x00,0x00, -0x7d,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0xfc,0x00,0x00,0x00,0x7a,0x01,0x00,0x00,0x98,0x01,0x00,0x00, -0x41,0x00,0x06,0x00,0x53,0x00,0x00,0x00,0x7e,0x01,0x00,0x00, -0x12,0x01,0x00,0x00,0x09,0x00,0x00,0x00,0x64,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x7e,0x01,0x00,0x00,0x7d,0x01,0x00,0x00, -0xf9,0x00,0x02,0x00,0x0d,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x0d,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x80,0x01,0x00,0x00,0x8b,0x01,0x00,0x00,0x29,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x0a,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x0c,0x00,0x00,0x00,0xf5,0x00,0x07,0x00,0x11,0x00,0x00,0x00, -0x92,0x01,0x00,0x00,0x85,0x01,0x00,0x00,0x0a,0x00,0x00,0x00, -0x88,0x01,0x00,0x00,0x2f,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x89,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x92,0x01,0x00,0x00,0x83,0x01,0x00,0x00,0x89,0x01,0x00,0x00, -0xf8,0x00,0x02,0x00,0x89,0x01,0x00,0x00,0xf9,0x00,0x02,0x00, -0x83,0x01,0x00,0x00,0xf8,0x00,0x02,0x00,0x83,0x01,0x00,0x00, -0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, -}; -const uint64_t dequant_q5_K_len = 5888; - -unsigned char dequant_q5_K_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0xa0,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00, 0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00, @@ -12162,366 +6653,10 @@ unsigned char dequant_q5_K_fp32_data[] = { 0x8a,0x01,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t dequant_q5_K_fp32_len = 5988; +const uint64_t dequant_q5_K_len = 5988; unsigned char dequant_q6_K_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x0a,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x25,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x66,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x17,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x23,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x23,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x23,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x33,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x5b,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x5d,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x60,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x62,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x62,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x80,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x62,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x62,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0xd0,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x63,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0xd2,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x64,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x64,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x64,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x66,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x66,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x76,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x77,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x77,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x77,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x79,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x79,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0xff,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, -0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x14,0x00,0x02,0x00,0x11,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x15,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x15,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x16,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x19,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x1e,0x00,0x06,0x00,0x23,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x24,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x26,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x16,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x4b,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x52,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x1c,0x00,0x04,0x00,0x5b,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x5d,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x5e,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x60,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x61,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1e,0x00,0x06,0x00,0x62,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x60,0x00,0x00,0x00, -0x61,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x63,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x64,0x00,0x00,0x00, -0x63,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x65,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x64,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x65,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x6c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x73,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x61,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x76,0x00,0x00,0x00,0x61,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x77,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x78,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x78,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x81,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x93,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xdc,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xe1,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0xfe,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2c,0x00,0x06,0x00,0x15,0x00,0x00,0x00,0xff,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0xfe,0x00,0x00,0x00,0xfe,0x00,0x00,0x00, -0x2a,0x00,0x03,0x00,0x11,0x00,0x00,0x00,0x02,0x01,0x00,0x00, -0x29,0x00,0x03,0x00,0x11,0x00,0x00,0x00,0x05,0x01,0x00,0x00, -0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x00,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00,0x18,0x00,0x00,0x00, -0x01,0x01,0x00,0x00,0xf8,0x00,0x02,0x00,0x01,0x01,0x00,0x00, -0xf9,0x00,0x02,0x00,0x0a,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x0a,0x00,0x00,0x00,0xf5,0x00,0x07,0x00,0x06,0x00,0x00,0x00, -0x08,0x01,0x00,0x00,0x09,0x00,0x00,0x00,0x01,0x01,0x00,0x00, -0xfd,0x00,0x00,0x00,0x0d,0x00,0x00,0x00,0xb1,0x00,0x05,0x00, -0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x08,0x01,0x00,0x00, -0x10,0x00,0x00,0x00,0xf6,0x00,0x04,0x00,0x0c,0x00,0x00,0x00, -0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x12,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x0b,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x19,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x14,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x08,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x14,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0x25,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x28,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x26,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x25,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,0x28,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0xaf,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x30,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x2e,0x00,0x00,0x00,0x2f,0x00,0x00,0x00,0x30,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x2f,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x0c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x30,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x19,0x00,0x00,0x00,0x34,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0x35,0x00,0x00,0x00,0x34,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x36,0x00,0x00,0x00, -0x35,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x36,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3e,0x00,0x00,0x00, -0x39,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x36,0x00,0x00,0x00, -0x3e,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x43,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x46,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x47,0x00,0x00,0x00,0x43,0x00,0x00,0x00, -0x46,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0x4b,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x4d,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x52,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x6c,0x00,0x00,0x00, -0x6d,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x36,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x6d,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x73,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x61,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x81,0x00,0x00,0x00,0x82,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x7e,0x00,0x00,0x00,0x47,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x5e,0x00,0x00,0x00,0x83,0x00,0x00,0x00,0x82,0x00,0x00,0x00, -0x72,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x84,0x00,0x00,0x00, -0x83,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x6c,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0x89,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x8a,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x8b,0x00,0x00,0x00,0x8a,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0x8b,0x00,0x00,0x00,0x8c,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x57,0x00,0x00,0x00,0x8f,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0x90,0x00,0x00,0x00,0x8f,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x91,0x00,0x00,0x00,0x90,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x92,0x00,0x00,0x00, -0x91,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x94,0x00,0x00,0x00,0x92,0x00,0x00,0x00, -0x93,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x95,0x00,0x00,0x00,0x8d,0x00,0x00,0x00,0x94,0x00,0x00,0x00, -0x72,0x00,0x04,0x00,0x5e,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0x95,0x00,0x00,0x00,0x72,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x97,0x00,0x00,0x00,0x96,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x98,0x00,0x00,0x00,0x97,0x00,0x00,0x00, -0x39,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x99,0x00,0x00,0x00,0x84,0x00,0x00,0x00,0x98,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x61,0x00,0x00,0x00,0x9a,0x00,0x00,0x00, -0x99,0x00,0x00,0x00,0x85,0x00,0x05,0x00,0x61,0x00,0x00,0x00, -0x9b,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x9a,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x73,0x00,0x00,0x00,0x9c,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x9c,0x00,0x00,0x00,0x9b,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x9e,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x39,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa2,0x00,0x00,0x00,0x47,0x00,0x00,0x00, -0x7e,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x81,0x00,0x00,0x00, -0xa3,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,0xa2,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x5e,0x00,0x00,0x00,0xa4,0x00,0x00,0x00, -0xa3,0x00,0x00,0x00,0x72,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xa5,0x00,0x00,0x00,0xa4,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa8,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x39,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x6c,0x00,0x00,0x00, -0xa9,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0xa8,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0xaa,0x00,0x00,0x00, -0xa9,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0xab,0x00,0x00,0x00,0xaa,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xac,0x00,0x00,0x00,0xab,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xad,0x00,0x00,0x00, -0xac,0x00,0x00,0x00,0x8c,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x57,0x00,0x00,0x00,0xaf,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x7e,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0xb0,0x00,0x00,0x00,0xaf,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb1,0x00,0x00,0x00,0xb0,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xb2,0x00,0x00,0x00, -0xb1,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xb3,0x00,0x00,0x00,0xb2,0x00,0x00,0x00, -0x93,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xb4,0x00,0x00,0x00,0xad,0x00,0x00,0x00,0xb3,0x00,0x00,0x00, -0x72,0x00,0x04,0x00,0x5e,0x00,0x00,0x00,0xb5,0x00,0x00,0x00, -0xb4,0x00,0x00,0x00,0x72,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xb6,0x00,0x00,0x00,0xb5,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xb7,0x00,0x00,0x00,0xb6,0x00,0x00,0x00, -0x39,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xb8,0x00,0x00,0x00,0xa5,0x00,0x00,0x00,0xb7,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x61,0x00,0x00,0x00,0xb9,0x00,0x00,0x00, -0xb8,0x00,0x00,0x00,0x85,0x00,0x05,0x00,0x61,0x00,0x00,0x00, -0xba,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0xb9,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x73,0x00,0x00,0x00,0xbb,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x9e,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xbb,0x00,0x00,0x00,0xba,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xbd,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xc1,0x00,0x00,0x00,0x47,0x00,0x00,0x00, -0x93,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x81,0x00,0x00,0x00, -0xc2,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,0xc1,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x5e,0x00,0x00,0x00,0xc3,0x00,0x00,0x00, -0xc2,0x00,0x00,0x00,0x72,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xc4,0x00,0x00,0x00,0xc3,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0xc9,0x00,0x00,0x00,0x88,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x57,0x00,0x00,0x00,0xca,0x00,0x00,0x00, -0xc9,0x00,0x00,0x00,0x93,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0xcb,0x00,0x00,0x00,0xca,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xcc,0x00,0x00,0x00, -0xcb,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x57,0x00,0x00,0x00, -0xce,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x93,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00,0xcf,0x00,0x00,0x00, -0xce,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xd0,0x00,0x00,0x00,0xcf,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xd1,0x00,0x00,0x00,0xd0,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xd2,0x00,0x00,0x00,0xd1,0x00,0x00,0x00,0x93,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xd3,0x00,0x00,0x00, -0xcc,0x00,0x00,0x00,0xd2,0x00,0x00,0x00,0x72,0x00,0x04,0x00, -0x5e,0x00,0x00,0x00,0xd4,0x00,0x00,0x00,0xd3,0x00,0x00,0x00, -0x72,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xd5,0x00,0x00,0x00, -0xd4,0x00,0x00,0x00,0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xd6,0x00,0x00,0x00,0xd5,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xd7,0x00,0x00,0x00, -0xc4,0x00,0x00,0x00,0xd6,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x61,0x00,0x00,0x00,0xd8,0x00,0x00,0x00,0xd7,0x00,0x00,0x00, -0x85,0x00,0x05,0x00,0x61,0x00,0x00,0x00,0xd9,0x00,0x00,0x00, -0x75,0x00,0x00,0x00,0xd8,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x73,0x00,0x00,0x00,0xda,0x00,0x00,0x00,0x79,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0xbd,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xda,0x00,0x00,0x00,0xd9,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xdd,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0xdc,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xe2,0x00,0x00,0x00,0x47,0x00,0x00,0x00,0xe1,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x81,0x00,0x00,0x00,0xe3,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x7e,0x00,0x00,0x00,0xe2,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x5e,0x00,0x00,0x00,0xe4,0x00,0x00,0x00,0xe3,0x00,0x00,0x00, -0x72,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xe5,0x00,0x00,0x00, -0xe4,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x57,0x00,0x00,0x00, -0xea,0x00,0x00,0x00,0xa9,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x57,0x00,0x00,0x00,0xeb,0x00,0x00,0x00,0xea,0x00,0x00,0x00, -0x93,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x14,0x00,0x00,0x00, -0xec,0x00,0x00,0x00,0xeb,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xed,0x00,0x00,0x00,0xec,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x57,0x00,0x00,0x00,0xef,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0xe1,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x14,0x00,0x00,0x00,0xf0,0x00,0x00,0x00,0xef,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xf1,0x00,0x00,0x00, -0xf0,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xf2,0x00,0x00,0x00,0xf1,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0xc4,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xf3,0x00,0x00,0x00, -0xf2,0x00,0x00,0x00,0x93,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xf4,0x00,0x00,0x00,0xed,0x00,0x00,0x00, -0xf3,0x00,0x00,0x00,0x72,0x00,0x04,0x00,0x5e,0x00,0x00,0x00, -0xf5,0x00,0x00,0x00,0xf4,0x00,0x00,0x00,0x72,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xf6,0x00,0x00,0x00,0xf5,0x00,0x00,0x00, -0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xf7,0x00,0x00,0x00, -0xf6,0x00,0x00,0x00,0x39,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xf8,0x00,0x00,0x00,0xe5,0x00,0x00,0x00, -0xf7,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x61,0x00,0x00,0x00, -0xf9,0x00,0x00,0x00,0xf8,0x00,0x00,0x00,0x85,0x00,0x05,0x00, -0x61,0x00,0x00,0x00,0xfa,0x00,0x00,0x00,0x75,0x00,0x00,0x00, -0xf9,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x73,0x00,0x00,0x00, -0xfb,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0xdd,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xfb,0x00,0x00,0x00, -0xfa,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x0d,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x0d,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xfd,0x00,0x00,0x00,0x08,0x01,0x00,0x00, -0x29,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x0a,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x0c,0x00,0x00,0x00,0xf5,0x00,0x07,0x00, -0x11,0x00,0x00,0x00,0x09,0x01,0x00,0x00,0x02,0x01,0x00,0x00, -0x0a,0x00,0x00,0x00,0x05,0x01,0x00,0x00,0x2f,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x06,0x01,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x09,0x01,0x00,0x00,0x00,0x01,0x00,0x00, -0x06,0x01,0x00,0x00,0xf8,0x00,0x02,0x00,0x06,0x01,0x00,0x00, -0xf9,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0xf8,0x00,0x02,0x00, -0x00,0x01,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, - -}; -const uint64_t dequant_q6_K_len = 4212; - -unsigned char dequant_q6_K_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x10,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00, 0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00, @@ -12881,647 +7016,10 @@ unsigned char dequant_q6_K_fp32_data[] = { 0x06,0x01,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t dequant_q6_K_fp32_len = 4296; +const uint64_t dequant_q6_K_len = 4296; unsigned char dequant_q8_0_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0xd2,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x09,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x0c,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x14,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x14,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x4f,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x4f,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x51,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x51,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x51,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x53,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x6b,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x6c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x6c,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x6c,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x6e,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x6e,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x8e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, -0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0a,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x0d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0e,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x14,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x15,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x15,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x18,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x14,0x00,0x02,0x00, -0x24,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x49,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x4d,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x4c,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0x1e,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x50,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x51,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x58,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x5d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x4c,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x6b,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x6c,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x6d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x6c,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x6d,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x87,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x8d,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x2c,0x00,0x06,0x00, -0x0a,0x00,0x00,0x00,0x8e,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0x87,0x00,0x00,0x00,0x87,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb6,0x02,0x00,0x00,0x04,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xb7,0x02,0x00,0x00, -0x05,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xb8,0x02,0x00,0x00,0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb9,0x02,0x00,0x00,0x07,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xba,0x02,0x00,0x00, -0x08,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xbb,0x02,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xbc,0x02,0x00,0x00,0x0a,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xbd,0x02,0x00,0x00, -0x0b,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xbe,0x02,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xbf,0x02,0x00,0x00,0x0d,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xc0,0x02,0x00,0x00, -0x0e,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xc1,0x02,0x00,0x00,0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xc2,0x02,0x00,0x00,0x10,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xc3,0x02,0x00,0x00, -0x11,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xc4,0x02,0x00,0x00,0x12,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xc5,0x02,0x00,0x00,0x13,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xc6,0x02,0x00,0x00, -0x14,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xc7,0x02,0x00,0x00,0x15,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xc8,0x02,0x00,0x00,0x16,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xc9,0x02,0x00,0x00, -0x17,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xca,0x02,0x00,0x00,0x18,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xcb,0x02,0x00,0x00,0x19,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xcc,0x02,0x00,0x00, -0x1a,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xcd,0x02,0x00,0x00,0x1b,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xce,0x02,0x00,0x00,0x1c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xcf,0x02,0x00,0x00, -0x1d,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xd0,0x02,0x00,0x00,0x1e,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xd1,0x02,0x00,0x00,0x1f,0x00,0x00,0x00, -0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x8f,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00,0x0d,0x00,0x00,0x00, -0x90,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x90,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0e,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x0d,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0xaf,0x00,0x05,0x00,0x24,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0xa8,0x00,0x04,0x00, -0x24,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x2c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x2a,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0x2c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x2b,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00,0x2f,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x2f,0x00,0x00,0x00, -0xaf,0x00,0x05,0x00,0x24,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x2c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x2c,0x00,0x00,0x00, -0xf5,0x00,0x07,0x00,0x24,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x34,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x32,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x34,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x33,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x8f,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x34,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x18,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x39,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3e,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x3e,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x41,0x00,0x07,0x00, -0x55,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x53,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x5d,0x00,0x00,0x00,0x64,0x00,0x00,0x00,0x53,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x64,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x58,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00, -0x71,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x70,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0x71,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x73,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x73,0x00,0x00,0x00,0x26,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x7c,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x7c,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x86,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0x89,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x8a,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x8a,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0x9f,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0xa0,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0xa0,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0xa2,0x00,0x00,0x00, -0xa1,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0xa4,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x70,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0xa5,0x00,0x00,0x00, -0xa4,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0xa6,0x00,0x00,0x00,0xa5,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0xa7,0x00,0x00,0x00,0xa2,0x00,0x00,0x00, -0xa6,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0xa8,0x00,0x00,0x00,0xa7,0x00,0x00,0x00,0x9f,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xae,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0xb0,0x00,0x00,0x00,0xa8,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0xb1,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xae,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xb1,0x00,0x00,0x00, -0xb0,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xb8,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x70,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0xb9,0x00,0x00,0x00, -0xa8,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0xba,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xb8,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xba,0x00,0x00,0x00,0xb9,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0xc3,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0xc4,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xb6,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0xc5,0x00,0x00,0x00,0xc4,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0xc6,0x00,0x00,0x00, -0xc5,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0xc8,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xb7,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0xc9,0x00,0x00,0x00, -0xc8,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0xca,0x00,0x00,0x00,0xc9,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0xcb,0x00,0x00,0x00,0xc6,0x00,0x00,0x00, -0xca,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0xcc,0x00,0x00,0x00,0xcb,0x00,0x00,0x00,0xc3,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xd2,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0xb6,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0xd4,0x00,0x00,0x00,0xcc,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0xd5,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xd2,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xd5,0x00,0x00,0x00, -0xd4,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xdc,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0xb7,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0xdd,0x00,0x00,0x00, -0xcc,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0xde,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xdc,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xde,0x00,0x00,0x00,0xdd,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0xe7,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0xe8,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xb8,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0xe9,0x00,0x00,0x00,0xe8,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0xea,0x00,0x00,0x00, -0xe9,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0xec,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xb9,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0xed,0x00,0x00,0x00, -0xec,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0xee,0x00,0x00,0x00,0xed,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0xef,0x00,0x00,0x00,0xea,0x00,0x00,0x00, -0xee,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0xf0,0x00,0x00,0x00,0xef,0x00,0x00,0x00,0xe7,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xf6,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0xb8,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0xf8,0x00,0x00,0x00,0xf0,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0xf9,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xf6,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xf9,0x00,0x00,0x00, -0xf8,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x76,0x00,0x00,0x00,0xb9,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0x01,0x01,0x00,0x00, -0xf0,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x02,0x01,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x02,0x01,0x00,0x00,0x01,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0x0b,0x01,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0x0c,0x01,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xba,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0x0d,0x01,0x00,0x00,0x0c,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0x0e,0x01,0x00,0x00, -0x0d,0x01,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x10,0x01,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xbb,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x11,0x01,0x00,0x00, -0x10,0x01,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0x12,0x01,0x00,0x00,0x11,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x13,0x01,0x00,0x00,0x0e,0x01,0x00,0x00, -0x12,0x01,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x14,0x01,0x00,0x00,0x13,0x01,0x00,0x00,0x0b,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x1a,0x01,0x00,0x00, -0x76,0x00,0x00,0x00,0xba,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0x1c,0x01,0x00,0x00,0x14,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x1d,0x01,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x1a,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x1d,0x01,0x00,0x00, -0x1c,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x24,0x01,0x00,0x00,0x76,0x00,0x00,0x00,0xbb,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0x25,0x01,0x00,0x00, -0x14,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x26,0x01,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x24,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x26,0x01,0x00,0x00,0x25,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0x2f,0x01,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0x30,0x01,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xbc,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0x31,0x01,0x00,0x00,0x30,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0x32,0x01,0x00,0x00, -0x31,0x01,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x34,0x01,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xbd,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x35,0x01,0x00,0x00, -0x34,0x01,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0x36,0x01,0x00,0x00,0x35,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x37,0x01,0x00,0x00,0x32,0x01,0x00,0x00, -0x36,0x01,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x38,0x01,0x00,0x00,0x37,0x01,0x00,0x00,0x2f,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3e,0x01,0x00,0x00, -0x76,0x00,0x00,0x00,0xbc,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0x40,0x01,0x00,0x00,0x38,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x41,0x01,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3e,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x41,0x01,0x00,0x00, -0x40,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x48,0x01,0x00,0x00,0x76,0x00,0x00,0x00,0xbd,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0x49,0x01,0x00,0x00, -0x38,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x4a,0x01,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x48,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x4a,0x01,0x00,0x00,0x49,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0x53,0x01,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0x54,0x01,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xbe,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0x55,0x01,0x00,0x00,0x54,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0x56,0x01,0x00,0x00, -0x55,0x01,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x58,0x01,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xbf,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x59,0x01,0x00,0x00, -0x58,0x01,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0x5a,0x01,0x00,0x00,0x59,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x5b,0x01,0x00,0x00,0x56,0x01,0x00,0x00, -0x5a,0x01,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x5c,0x01,0x00,0x00,0x5b,0x01,0x00,0x00,0x53,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x62,0x01,0x00,0x00, -0x76,0x00,0x00,0x00,0xbe,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0x64,0x01,0x00,0x00,0x5c,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x65,0x01,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x62,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x65,0x01,0x00,0x00, -0x64,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x6c,0x01,0x00,0x00,0x76,0x00,0x00,0x00,0xbf,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0x6d,0x01,0x00,0x00, -0x5c,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x6e,0x01,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x6c,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x6e,0x01,0x00,0x00,0x6d,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0x77,0x01,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0x78,0x01,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xc0,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0x79,0x01,0x00,0x00,0x78,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0x7a,0x01,0x00,0x00, -0x79,0x01,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x7c,0x01,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xc1,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x7d,0x01,0x00,0x00, -0x7c,0x01,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0x7e,0x01,0x00,0x00,0x7d,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x7f,0x01,0x00,0x00,0x7a,0x01,0x00,0x00, -0x7e,0x01,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x80,0x01,0x00,0x00,0x7f,0x01,0x00,0x00,0x77,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x86,0x01,0x00,0x00, -0x76,0x00,0x00,0x00,0xc0,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0x88,0x01,0x00,0x00,0x80,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x89,0x01,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x86,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x89,0x01,0x00,0x00, -0x88,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x90,0x01,0x00,0x00,0x76,0x00,0x00,0x00,0xc1,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0x91,0x01,0x00,0x00, -0x80,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x92,0x01,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x90,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x92,0x01,0x00,0x00,0x91,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0x9b,0x01,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0x9c,0x01,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xc2,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0x9d,0x01,0x00,0x00,0x9c,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0x9e,0x01,0x00,0x00, -0x9d,0x01,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0xa0,0x01,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xc3,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0xa1,0x01,0x00,0x00, -0xa0,0x01,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0xa2,0x01,0x00,0x00,0xa1,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0xa3,0x01,0x00,0x00,0x9e,0x01,0x00,0x00, -0xa2,0x01,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0xa4,0x01,0x00,0x00,0xa3,0x01,0x00,0x00,0x9b,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xaa,0x01,0x00,0x00, -0x76,0x00,0x00,0x00,0xc2,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0xac,0x01,0x00,0x00,0xa4,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0xad,0x01,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xaa,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xad,0x01,0x00,0x00, -0xac,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xb4,0x01,0x00,0x00,0x76,0x00,0x00,0x00,0xc3,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0xb5,0x01,0x00,0x00, -0xa4,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0xb6,0x01,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xb4,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0xb6,0x01,0x00,0x00,0xb5,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0xbf,0x01,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0xc0,0x01,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xc4,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0xc1,0x01,0x00,0x00,0xc0,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0xc2,0x01,0x00,0x00, -0xc1,0x01,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0xc4,0x01,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xc5,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0xc5,0x01,0x00,0x00, -0xc4,0x01,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0xc6,0x01,0x00,0x00,0xc5,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0xc7,0x01,0x00,0x00,0xc2,0x01,0x00,0x00, -0xc6,0x01,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0xc8,0x01,0x00,0x00,0xc7,0x01,0x00,0x00,0xbf,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xce,0x01,0x00,0x00, -0x76,0x00,0x00,0x00,0xc4,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0xd0,0x01,0x00,0x00,0xc8,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0xd1,0x01,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xce,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xd1,0x01,0x00,0x00, -0xd0,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xd8,0x01,0x00,0x00,0x76,0x00,0x00,0x00,0xc5,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0xd9,0x01,0x00,0x00, -0xc8,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0xda,0x01,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xd8,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0xda,0x01,0x00,0x00,0xd9,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0xe3,0x01,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0xe4,0x01,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xc6,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0xe5,0x01,0x00,0x00,0xe4,0x01,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0xe6,0x01,0x00,0x00, -0xe5,0x01,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0xe8,0x01,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xc7,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0xe9,0x01,0x00,0x00, -0xe8,0x01,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0xea,0x01,0x00,0x00,0xe9,0x01,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0xeb,0x01,0x00,0x00,0xe6,0x01,0x00,0x00, -0xea,0x01,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0xec,0x01,0x00,0x00,0xeb,0x01,0x00,0x00,0xe3,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xf2,0x01,0x00,0x00, -0x76,0x00,0x00,0x00,0xc6,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0xf4,0x01,0x00,0x00,0xec,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0xf5,0x01,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xf2,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xf5,0x01,0x00,0x00, -0xf4,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xfc,0x01,0x00,0x00,0x76,0x00,0x00,0x00,0xc7,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0xfd,0x01,0x00,0x00, -0xec,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0xfe,0x01,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xfc,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0xfe,0x01,0x00,0x00,0xfd,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0x07,0x02,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0x08,0x02,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xc8,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0x09,0x02,0x00,0x00,0x08,0x02,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0x0a,0x02,0x00,0x00, -0x09,0x02,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x0c,0x02,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xc9,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x0d,0x02,0x00,0x00, -0x0c,0x02,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0x0e,0x02,0x00,0x00,0x0d,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x0f,0x02,0x00,0x00,0x0a,0x02,0x00,0x00, -0x0e,0x02,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x10,0x02,0x00,0x00,0x0f,0x02,0x00,0x00,0x07,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x16,0x02,0x00,0x00, -0x76,0x00,0x00,0x00,0xc8,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0x18,0x02,0x00,0x00,0x10,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x19,0x02,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x16,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x19,0x02,0x00,0x00, -0x18,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x20,0x02,0x00,0x00,0x76,0x00,0x00,0x00,0xc9,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0x21,0x02,0x00,0x00, -0x10,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x22,0x02,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x20,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x22,0x02,0x00,0x00,0x21,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0x2b,0x02,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0x2c,0x02,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xca,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0x2d,0x02,0x00,0x00,0x2c,0x02,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0x2e,0x02,0x00,0x00, -0x2d,0x02,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x30,0x02,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xcb,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x31,0x02,0x00,0x00, -0x30,0x02,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0x32,0x02,0x00,0x00,0x31,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x33,0x02,0x00,0x00,0x2e,0x02,0x00,0x00, -0x32,0x02,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x34,0x02,0x00,0x00,0x33,0x02,0x00,0x00,0x2b,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3a,0x02,0x00,0x00, -0x76,0x00,0x00,0x00,0xca,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0x3c,0x02,0x00,0x00,0x34,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x3d,0x02,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3a,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x3d,0x02,0x00,0x00, -0x3c,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x44,0x02,0x00,0x00,0x76,0x00,0x00,0x00,0xcb,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0x45,0x02,0x00,0x00, -0x34,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x46,0x02,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x44,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x46,0x02,0x00,0x00,0x45,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0x4f,0x02,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0x50,0x02,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xcc,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0x51,0x02,0x00,0x00,0x50,0x02,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0x52,0x02,0x00,0x00, -0x51,0x02,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x54,0x02,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xcd,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x55,0x02,0x00,0x00, -0x54,0x02,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0x56,0x02,0x00,0x00,0x55,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x57,0x02,0x00,0x00,0x52,0x02,0x00,0x00, -0x56,0x02,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x58,0x02,0x00,0x00,0x57,0x02,0x00,0x00,0x4f,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x5e,0x02,0x00,0x00, -0x76,0x00,0x00,0x00,0xcc,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0x60,0x02,0x00,0x00,0x58,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x61,0x02,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x5e,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x61,0x02,0x00,0x00, -0x60,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x68,0x02,0x00,0x00,0x76,0x00,0x00,0x00,0xcd,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0x69,0x02,0x00,0x00, -0x58,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x6a,0x02,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x68,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x6a,0x02,0x00,0x00,0x69,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0x73,0x02,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0x74,0x02,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xce,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0x75,0x02,0x00,0x00,0x74,0x02,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0x76,0x02,0x00,0x00, -0x75,0x02,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x78,0x02,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xcf,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x79,0x02,0x00,0x00, -0x78,0x02,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0x7a,0x02,0x00,0x00,0x79,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x7b,0x02,0x00,0x00,0x76,0x02,0x00,0x00, -0x7a,0x02,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x7c,0x02,0x00,0x00,0x7b,0x02,0x00,0x00,0x73,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x82,0x02,0x00,0x00, -0x76,0x00,0x00,0x00,0xce,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0x84,0x02,0x00,0x00,0x7c,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x85,0x02,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x82,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x85,0x02,0x00,0x00, -0x84,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x8c,0x02,0x00,0x00,0x76,0x00,0x00,0x00,0xcf,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0x8d,0x02,0x00,0x00, -0x7c,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x8e,0x02,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x8c,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x8e,0x02,0x00,0x00,0x8d,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x49,0x00,0x00,0x00,0x97,0x02,0x00,0x00,0x56,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00,0x98,0x02,0x00,0x00, -0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xd0,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4c,0x00,0x00,0x00,0x99,0x02,0x00,0x00,0x98,0x02,0x00,0x00, -0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00,0x9a,0x02,0x00,0x00, -0x99,0x02,0x00,0x00,0x41,0x00,0x08,0x00,0x5d,0x00,0x00,0x00, -0x9c,0x02,0x00,0x00,0x53,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0xd1,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x9d,0x02,0x00,0x00, -0x9c,0x02,0x00,0x00,0x6f,0x00,0x04,0x00,0x49,0x00,0x00,0x00, -0x9e,0x02,0x00,0x00,0x9d,0x02,0x00,0x00,0x50,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x9f,0x02,0x00,0x00,0x9a,0x02,0x00,0x00, -0x9e,0x02,0x00,0x00,0x8e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0xa0,0x02,0x00,0x00,0x9f,0x02,0x00,0x00,0x97,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xa6,0x02,0x00,0x00, -0x76,0x00,0x00,0x00,0xd0,0x02,0x00,0x00,0x51,0x00,0x05,0x00, -0x49,0x00,0x00,0x00,0xa8,0x02,0x00,0x00,0xa0,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0xa9,0x02,0x00,0x00,0x6e,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xa6,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0xa9,0x02,0x00,0x00, -0xa8,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xb0,0x02,0x00,0x00,0x76,0x00,0x00,0x00,0xd1,0x02,0x00,0x00, -0x51,0x00,0x05,0x00,0x49,0x00,0x00,0x00,0xb1,0x02,0x00,0x00, -0xa0,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0xb2,0x02,0x00,0x00,0x6e,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xb0,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0xb2,0x02,0x00,0x00,0xb1,0x02,0x00,0x00,0xf9,0x00,0x02,0x00, -0x8f,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x8f,0x00,0x00,0x00, -0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, -}; -const uint64_t dequant_q8_0_len = 7592; - -unsigned char dequant_q8_0_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x23,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, @@ -14262,7 +7760,7 @@ unsigned char dequant_q8_0_fp32_data[] = { 0x95,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t dequant_q8_0_fp32_len = 8868; +const uint64_t dequant_q8_0_len = 8868; unsigned char diag_mask_inf_f32_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, @@ -14530,144 +8028,6 @@ unsigned char f32_to_f16_data[] = { }; const uint64_t f32_to_f16_len = 1596; -unsigned char f32_to_f16_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x09,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x13,0x00,0x00,0x00,0x36,0x00,0x00,0x00,0x42,0x00,0x00,0x00, -0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x0c,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x11,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x11,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x11,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x11,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x33,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x34,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x34,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x36,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x36,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x3f,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x40,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x40,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x42,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x42,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x52,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, -0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0a,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x0d,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x0e,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x1e,0x00,0x06,0x00,0x11,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x12,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x12,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x15,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00, -0x23,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x32,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x33,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x34,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x35,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x34,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x35,0x00,0x00,0x00,0x36,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x3e,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x3f,0x00,0x00,0x00,0x3e,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x40,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x41,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x41,0x00,0x00,0x00,0x42,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x3e,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2c,0x00,0x06,0x00,0x0a,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x05,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0e,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x0d,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x15,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x89,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x86,0x00,0x05,0x00, -0x09,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0xb1,0x00,0x05,0x00, -0x23,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x29,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x27,0x00,0x00,0x00, -0x28,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x28,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x15,0x00,0x00,0x00, -0x2c,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x2c,0x00,0x00,0x00,0xb1,0x00,0x05,0x00,0x23,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x29,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x29,0x00,0x00,0x00,0xf5,0x00,0x07,0x00,0x23,0x00,0x00,0x00, -0x2f,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0x05,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x31,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x2f,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x30,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x15,0x00,0x00,0x00,0x39,0x00,0x00,0x00,0x13,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x39,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3b,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3d,0x00,0x00,0x00,0x3b,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x15,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x13,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x46,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x47,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x46,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x47,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x4a,0x00,0x00,0x00, -0x4b,0x00,0x00,0x00,0x42,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x3e,0x00,0x00,0x00, -0x4c,0x00,0x00,0x00,0x4b,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x32,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x4c,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x4e,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x36,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0x3d,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x4f,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x31,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x31,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, - -}; -const uint64_t f32_to_f16_fp32_len = 1596; - unsigned char gelu_f32_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x4b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, @@ -14798,500 +8158,6 @@ const uint64_t gelu_f32_len = 1484; unsigned char get_rows_f16_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x77,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, -0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, -0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00,0x0e,0x00,0x03,0x00, -0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x0a,0x00, -0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, -0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x2b,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x52,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x53,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x53,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x55,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x60,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x61,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x61,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x61,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x63,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x63,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x74,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, -0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x4e,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x52,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x53,0x00,0x00,0x00, -0x52,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x54,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x54,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x58,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x60,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x61,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x62,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x61,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x62,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x73,0x00,0x00,0x00, -0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x73,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x75,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00, -0x0c,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x76,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00, -0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x10,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x0d,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x21,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0xae,0x00,0x05,0x00,0x24,0x00,0x00,0x00, -0x25,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00,0x26,0x00,0x00,0x00, -0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x26,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x75,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x30,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x38,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x86,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x47,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x47,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x89,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4c,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4d,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x4c,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00,0x59,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, -0x63,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x66,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x6b,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x58,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x72,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x75,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x75,0x00,0x00,0x00, -0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, -}; -const uint64_t get_rows_f16_len = 1892; - -unsigned char get_rows_f16_f32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x7a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, -0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, -0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00,0x0e,0x00,0x03,0x00, -0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x0a,0x00, -0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, -0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x2b,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x52,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x53,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x53,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x55,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x60,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x61,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x61,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x61,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x63,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x63,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x77,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, -0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x4e,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x52,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x53,0x00,0x00,0x00, -0x52,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x54,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x54,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x58,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x60,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x61,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x62,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x61,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x62,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x6c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00, -0x09,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00, -0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x78,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x79,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x79,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x10,0x00,0x00,0x00,0x13,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x13,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x21,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0xae,0x00,0x05,0x00, -0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x78,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x33,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x89,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x47,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x47,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4c,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x4c,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00, -0x59,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x6c,0x00,0x00,0x00,0x6d,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x6d,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x66,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x6c,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x75,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x78,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x78,0x00,0x00,0x00, -0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, -}; -const uint64_t get_rows_f16_f32_len = 1940; - -unsigned char get_rows_f16_f32_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x7a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x55,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x53,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x55,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x62,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x63,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x63,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x63,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x65,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x65,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x77,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, -0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00, -0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, -0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x51,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x52,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x53,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x54,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x55,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x58,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x62,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x64,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x64,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x6d,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x00,0x02,0x00,0x00, -0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x78,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x79,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x10,0x00,0x00,0x00, -0x13,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x13,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x21,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0xae,0x00,0x05,0x00,0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x25,0x00,0x00,0x00,0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x78,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x33,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x47,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x86,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x47,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4c,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x4c,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x58,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x55,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x51,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x59,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x51,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x6d,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x6e,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x6d,0x00,0x00,0x00,0x75,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x75,0x00,0x00,0x00,0x60,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x78,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x78,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, - -}; -const uint64_t get_rows_f16_f32_fp32_len = 1932; - -unsigned char get_rows_f16_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, @@ -15455,21 +8321,19 @@ unsigned char get_rows_f16_fp32_data[] = { 0xf8,0x00,0x02,0x00,0x79,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, 0x38,0x00,0x01,0x00, }; -const uint64_t get_rows_f16_fp32_len = 1948; +const uint64_t get_rows_f16_len = 1948; -unsigned char get_rows_q4_0_data[] = { +unsigned char get_rows_f16_f32_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x97,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, +0x7a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, +0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, 0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, 0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, 0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, 0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, +0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x55,0x00,0x00,0x00, +0x65,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, 0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, 0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, 0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, @@ -15488,297 +8352,75 @@ unsigned char get_rows_q4_0_data[] = { 0x2b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, 0x2d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x58,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x58,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x5a,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x7a,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x7b,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x7b,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x7d,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x7d,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x8d,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, -0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x50,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, -0x55,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x1e,0x00,0x04,0x00,0x56,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x57,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x58,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x59,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x59,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x63,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x66,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x00,0x48,0x00,0x00, -0x1d,0x00,0x03,0x00,0x7a,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x7b,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x7c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x7c,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x8c,0x00,0x00,0x00,0x00,0x02,0x00,0x00, -0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x2c,0x00,0x05,0x00,0x66,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0x75,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x36,0x00,0x05,0x00, -0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x8e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x8f,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x8f,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x10,0x00,0x00,0x00,0x13,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x13,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x21,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0xae,0x00,0x05,0x00, -0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x8e,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x33,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x89,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x63,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x10,0x00,0x00,0x00,0x6d,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, -0x6c,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0x6d,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x53,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x66,0x00,0x00,0x00,0x73,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0x83,0x00,0x05,0x00,0x66,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x73,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x66,0x00,0x00,0x00,0x79,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0x83,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x5c,0x00,0x00,0x00,0x84,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x80,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x84,0x00,0x00,0x00,0x83,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x88,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0x8a,0x00,0x00,0x00,0x79,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x5c,0x00,0x00,0x00, -0x8b,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x8b,0x00,0x00,0x00, -0x8a,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x8e,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x8e,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, -0x38,0x00,0x01,0x00, -}; -const uint64_t get_rows_q4_0_len = 2356; - -unsigned char get_rows_q4_0_f32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x9a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x52,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00, +0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x58,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x58,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x5a,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x7a,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x7b,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x7b,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x7d,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x7d,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x90,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, -0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x50,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, -0x55,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x1e,0x00,0x04,0x00,0x56,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x57,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x58,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x59,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x59,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x63,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x66,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x00,0x48,0x00,0x00, -0x1d,0x00,0x03,0x00,0x7a,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x7b,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x7c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x7c,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x85,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x8f,0x00,0x00,0x00, -0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00, -0x90,0x00,0x00,0x00,0x8f,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x2c,0x00,0x05,0x00,0x66,0x00,0x00,0x00, -0x99,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x75,0x00,0x00,0x00, +0x53,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x55,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x21,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x62,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, +0x63,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x63,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, +0x63,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x65,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x65,0x00,0x00,0x00,0x21,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x77,0x00,0x00,0x00, +0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, +0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00, +0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, +0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00, +0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, +0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00, +0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00, +0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00, +0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, +0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x16,0x00,0x03,0x00, +0x51,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, +0x52,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x53,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x54,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x55,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x58,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, +0x62,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x64,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x63,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x64,0x00,0x00,0x00,0x65,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x6d,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x00,0x02,0x00,0x00, +0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00,0x77,0x00,0x00,0x00, +0x76,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00, 0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x91,0x00,0x00,0x00, +0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x78,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00, -0x92,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x92,0x00,0x00,0x00, +0x79,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x79,0x00,0x00,0x00, 0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00, 0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, 0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00, @@ -15800,7 +8442,7 @@ unsigned char get_rows_q4_0_f32_data[] = { 0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, 0x25,0x00,0x00,0x00,0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00, 0xf8,0x00,0x02,0x00,0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x91,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00, +0x78,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00, 0x41,0x00,0x06,0x00,0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00, 0x2d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, 0x3d,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x32,0x00,0x00,0x00, @@ -15813,258 +8455,41 @@ unsigned char get_rows_q4_0_f32_data[] = { 0x1a,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00, 0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, 0x14,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x86,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00, -0x5c,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x63,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x10,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x53,0x00,0x00,0x00,0x71,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x70,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x71,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x66,0x00,0x00,0x00,0x73,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x83,0x00,0x05,0x00, -0x66,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x73,0x00,0x00,0x00, -0x99,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x66,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x80,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0x83,0x00,0x00,0x00,0x79,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x84,0x00,0x00,0x00,0x83,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x85,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x86,0x00,0x00,0x00,0x84,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x8a,0x00,0x00,0x00,0x80,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x85,0x00,0x00,0x00, -0x8e,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x8a,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x8e,0x00,0x00,0x00, -0x8d,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x91,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x91,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, -0x38,0x00,0x01,0x00, -}; -const uint64_t get_rows_q4_0_f32_len = 2404; +0x44,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x47,0x00,0x00,0x00, +0x3a,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x86,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x47,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x4c,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, +0x41,0x00,0x00,0x00,0x4c,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x58,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x55,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x51,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x59,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, +0x5a,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x5d,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, +0x55,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x51,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, +0x5e,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, +0x60,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, +0x48,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x6d,0x00,0x00,0x00, +0x6e,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x68,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x6e,0x00,0x00,0x00, +0x5b,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x72,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x41,0x00,0x06,0x00,0x6d,0x00,0x00,0x00,0x75,0x00,0x00,0x00, +0x65,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x72,0x00,0x00,0x00, +0x3e,0x00,0x03,0x00,0x75,0x00,0x00,0x00,0x60,0x00,0x00,0x00, +0xf9,0x00,0x02,0x00,0x78,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, +0x78,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, -unsigned char get_rows_q4_0_f32_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x97,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, -0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, -0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, -0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00,0x0e,0x00,0x03,0x00, -0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x0a,0x00, -0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, -0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x2b,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x58,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x79,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x7a,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x7a,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x7a,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x7c,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x7c,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x8d,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, -0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00, -0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x52,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1c,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x1e,0x00,0x04,0x00, -0x56,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x55,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x57,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x58,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x59,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x59,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x63,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x67,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x1d,0x00,0x03,0x00, -0x79,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x7a,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x7b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x7b,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x83,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x8c,0x00,0x00,0x00,0x00,0x02,0x00,0x00, -0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x2c,0x00,0x05,0x00,0x67,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0x36,0x00,0x05,0x00, -0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x8e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x8f,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x8f,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x10,0x00,0x00,0x00,0x13,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x13,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x21,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0xae,0x00,0x05,0x00, -0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x8e,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x33,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x89,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x63,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0x66,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x6d,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x66,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x71,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x67,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, -0x71,0x00,0x00,0x00,0x83,0x00,0x05,0x00,0x67,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x67,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x7f,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00, -0x82,0x00,0x00,0x00,0x78,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x83,0x00,0x00,0x00,0x84,0x00,0x00,0x00, -0x7c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x7f,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x84,0x00,0x00,0x00,0x82,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x88,0x00,0x00,0x00, -0x7f,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x1c,0x00,0x00,0x00,0x8a,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x83,0x00,0x00,0x00, -0x8b,0x00,0x00,0x00,0x7c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x8b,0x00,0x00,0x00, -0x8a,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x8e,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x8e,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, -0x38,0x00,0x01,0x00, }; -const uint64_t get_rows_q4_0_f32_fp32_len = 2356; +const uint64_t get_rows_f16_f32_len = 1932; -unsigned char get_rows_q4_0_fp32_data[] = { +unsigned char get_rows_q4_0_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x98,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, @@ -16264,425 +8689,11 @@ unsigned char get_rows_q4_0_fp32_data[] = { 0x8f,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x8f,0x00,0x00,0x00, 0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t get_rows_q4_0_fp32_len = 2372; +const uint64_t get_rows_q4_0_len = 2372; -unsigned char get_rows_q4_1_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x94,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x81,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x58,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x7e,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x7f,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x7f,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x7f,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x81,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x81,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x91,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, -0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00, -0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x50,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1c,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x1e,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x57,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x58,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x59,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x59,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x67,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x6a,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x70,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x7e,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x7f,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x80,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x7f,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x80,0x00,0x00,0x00,0x81,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x90,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00, -0x09,0x00,0x00,0x00,0x91,0x00,0x00,0x00,0x90,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00, -0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x92,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x93,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x93,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x10,0x00,0x00,0x00,0x13,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x13,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x21,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0xae,0x00,0x05,0x00, -0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x92,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x33,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x89,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, -0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00,0x61,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x61,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x67,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00, -0x69,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x69,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x10,0x00,0x00,0x00, -0x71,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x70,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0x71,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x53,0x00,0x00,0x00, -0x75,0x00,0x00,0x00,0x69,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x75,0x00,0x00,0x00,0x50,0x00,0x05,0x00,0x6a,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x6a,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x6a,0x00,0x00,0x00,0x7c,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x81,0x00,0x05,0x00,0x6a,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x7a,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x84,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0x87,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x5c,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0x81,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x84,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x88,0x00,0x00,0x00, -0x87,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x84,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x50,0x00,0x00,0x00,0x8e,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x5c,0x00,0x00,0x00,0x8f,0x00,0x00,0x00,0x81,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x8c,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x8f,0x00,0x00,0x00,0x8e,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x92,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x92,0x00,0x00,0x00, -0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, -}; -const uint64_t get_rows_q4_1_len = 2408; - -unsigned char get_rows_q4_1_f32_data[] = { +unsigned char get_rows_q4_0_f32_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x97,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x81,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x58,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x7e,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x7f,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x7f,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x7f,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x81,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x81,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x94,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, -0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00, -0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x50,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1c,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x1e,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x57,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x58,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x59,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x59,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x67,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x6a,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x70,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x7e,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x7f,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x80,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x7f,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x80,0x00,0x00,0x00,0x81,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x89,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x93,0x00,0x00,0x00,0x00,0x02,0x00,0x00, -0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00,0x94,0x00,0x00,0x00, -0x93,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x95,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00, -0x96,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x96,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x10,0x00,0x00,0x00, -0x13,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x13,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x21,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0xae,0x00,0x05,0x00,0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x25,0x00,0x00,0x00,0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x95,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x33,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x86,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00, -0x5c,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00, -0x61,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x61,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x67,0x00,0x00,0x00,0x68,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x69,0x00,0x00,0x00,0x68,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x69,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x10,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x53,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x69,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x6a,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x6a,0x00,0x00,0x00, -0x7a,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x6a,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x81,0x00,0x05,0x00, -0x6a,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, -0x7c,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x84,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x50,0x00,0x00,0x00,0x87,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x1c,0x00,0x00,0x00,0x88,0x00,0x00,0x00,0x87,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x89,0x00,0x00,0x00,0x8a,0x00,0x00,0x00, -0x81,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x84,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x8a,0x00,0x00,0x00,0x88,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x8e,0x00,0x00,0x00, -0x84,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x91,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x89,0x00,0x00,0x00,0x92,0x00,0x00,0x00,0x81,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x8e,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x92,0x00,0x00,0x00,0x91,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x95,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x95,0x00,0x00,0x00, -0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, -}; -const uint64_t get_rows_q4_1_f32_len = 2456; - -unsigned char get_rows_q4_1_f32_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x95,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, 0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, @@ -16690,7 +8701,7 @@ unsigned char get_rows_q4_1_f32_fp32_data[] = { 0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x0a,0x00, 0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, 0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x81,0x00,0x00,0x00, +0x2d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, 0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, 0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, 0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, @@ -16714,91 +8725,92 @@ unsigned char get_rows_q4_1_f32_fp32_data[] = { 0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x58,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x5a,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x7e,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x7f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x7f,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x7f,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x81,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x81,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x92,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, -0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00, -0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, -0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x52,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x53,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x53,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x1e,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0x52,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x55,0x00,0x00,0x00, +0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x57,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x12,0x00,0x00,0x00, +0x48,0x00,0x04,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x03,0x00,0x58,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, +0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x79,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00, +0x48,0x00,0x04,0x00,0x7a,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x7a,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x03,0x00,0x7a,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x7c,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x7c,0x00,0x00,0x00, +0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x8d,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, +0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, +0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00, +0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00, +0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00, +0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, +0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, +0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00, +0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00, +0x52,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00, +0x53,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x54,0x00,0x00,0x00, +0x10,0x00,0x00,0x00,0x1c,0x00,0x04,0x00,0x55,0x00,0x00,0x00, +0x53,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x1e,0x00,0x04,0x00, +0x56,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x55,0x00,0x00,0x00, 0x1d,0x00,0x03,0x00,0x57,0x00,0x00,0x00,0x56,0x00,0x00,0x00, 0x1e,0x00,0x03,0x00,0x58,0x00,0x00,0x00,0x57,0x00,0x00,0x00, 0x20,0x00,0x04,0x00,0x59,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, 0x58,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x59,0x00,0x00,0x00, 0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, 0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x68,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x6c,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x63,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x53,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x67,0x00,0x00,0x00, 0x1c,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x7e,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x7f,0x00,0x00,0x00, -0x7e,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x80,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x7f,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x80,0x00,0x00,0x00,0x81,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x88,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x91,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00, -0x09,0x00,0x00,0x00,0x92,0x00,0x00,0x00,0x91,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, +0x74,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x1d,0x00,0x03,0x00, +0x79,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x7a,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x7b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x7b,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x83,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x8c,0x00,0x00,0x00,0x00,0x02,0x00,0x00, +0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, +0x8c,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x2c,0x00,0x05,0x00,0x67,0x00,0x00,0x00,0x96,0x00,0x00,0x00, +0x74,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0x36,0x00,0x05,0x00, 0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x93,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x94,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x94,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0xf7,0x00,0x03,0x00,0x8e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x8f,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x8f,0x00,0x00,0x00,0x41,0x00,0x05,0x00, 0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, 0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, 0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, @@ -16819,7 +8831,7 @@ unsigned char get_rows_q4_1_f32_fp32_data[] = { 0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00, 0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x93,0x00,0x00,0x00, +0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x8e,0x00,0x00,0x00, 0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00, 0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, 0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, @@ -16845,49 +8857,42 @@ unsigned char get_rows_q4_1_f32_fp32_data[] = { 0x45,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, 0x52,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, 0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0x63,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x68,0x00,0x00,0x00, -0x69,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0x69,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x6c,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x6c,0x00,0x00,0x00, -0x7a,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x6c,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x64,0x00,0x00,0x00,0x81,0x00,0x05,0x00, -0x6c,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, -0x7c,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x84,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00,0x87,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x88,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0x81,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x84,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x89,0x00,0x00,0x00,0x87,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x8d,0x00,0x00,0x00,0x84,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00, -0x8f,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x88,0x00,0x00,0x00,0x90,0x00,0x00,0x00, -0x81,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x90,0x00,0x00,0x00,0x8f,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x93,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x93,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, - +0x5e,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x63,0x00,0x00,0x00, +0x64,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x65,0x00,0x00,0x00, +0x64,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x66,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0x66,0x00,0x00,0x00, +0x6b,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, +0x6d,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x66,0x00,0x00,0x00, +0x6f,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, +0x71,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x50,0x00,0x05,0x00, +0x67,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, +0x71,0x00,0x00,0x00,0x83,0x00,0x05,0x00,0x67,0x00,0x00,0x00, +0x76,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x96,0x00,0x00,0x00, +0x8e,0x00,0x05,0x00,0x67,0x00,0x00,0x00,0x78,0x00,0x00,0x00, +0x76,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x7f,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, +0x4a,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00, +0x82,0x00,0x00,0x00,0x78,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x41,0x00,0x06,0x00,0x83,0x00,0x00,0x00,0x84,0x00,0x00,0x00, +0x7c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x7f,0x00,0x00,0x00, +0x3e,0x00,0x03,0x00,0x84,0x00,0x00,0x00,0x82,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x88,0x00,0x00,0x00, +0x7f,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x51,0x00,0x05,0x00, +0x1c,0x00,0x00,0x00,0x8a,0x00,0x00,0x00,0x78,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x83,0x00,0x00,0x00, +0x8b,0x00,0x00,0x00,0x7c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x88,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x8b,0x00,0x00,0x00, +0x8a,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x8e,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x8e,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, +0x38,0x00,0x01,0x00, }; -const uint64_t get_rows_q4_1_f32_fp32_len = 2424; +const uint64_t get_rows_q4_0_f32_len = 2356; -unsigned char get_rows_q4_1_fp32_data[] = { +unsigned char get_rows_q4_1_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x96,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, @@ -17093,503 +9098,11 @@ unsigned char get_rows_q4_1_fp32_data[] = { 0xf8,0x00,0x02,0x00,0x94,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, 0x38,0x00,0x01,0x00, }; -const uint64_t get_rows_q4_1_fp32_len = 2440; +const uint64_t get_rows_q4_1_len = 2440; -unsigned char get_rows_q5_0_data[] = { +unsigned char get_rows_q4_1_f32_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0xc2,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, -0xa6,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x54,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x59,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x5a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x5a,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5c,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x5c,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0xa3,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0xa4,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0xa4,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0xa4,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0xa6,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0xa6,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0xb6,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, -0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x50,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x1c,0x00,0x04,0x00,0x54,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x55,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1c,0x00,0x04,0x00,0x57,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x1e,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x59,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x5a,0x00,0x00,0x00, -0x59,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x5b,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x5b,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x5e,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x63,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x7a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x84,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x55,0x00,0x00,0x00, -0x17,0x00,0x04,0x00,0x87,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x8d,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x9e,0x00,0x00,0x00,0x00,0x4c,0x00,0x00, -0x1d,0x00,0x03,0x00,0xa3,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0xa4,0x00,0x00,0x00,0xa3,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0xa5,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0xa4,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0xa5,0x00,0x00,0x00, -0xa6,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb5,0x00,0x00,0x00,0x00,0x02,0x00,0x00, -0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00,0xb6,0x00,0x00,0x00, -0xb5,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x2c,0x00,0x05,0x00,0x87,0x00,0x00,0x00,0xc1,0x00,0x00,0x00, -0x9e,0x00,0x00,0x00,0x9e,0x00,0x00,0x00,0x36,0x00,0x05,0x00, -0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0xb7,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0xb8,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xb8,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x10,0x00,0x00,0x00,0x13,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x13,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x21,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0xae,0x00,0x05,0x00, -0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0xb7,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x33,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x89,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5e,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x63,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x66,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x63,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x6d,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x73,0x00,0x00,0x00, -0x6d,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x73,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0x6d,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x7e,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x84,0x00,0x00,0x00,0x85,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x86,0x00,0x00,0x00,0x85,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x8b,0x00,0x00,0x00,0x86,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, -0x8b,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x10,0x00,0x00,0x00, -0x8e,0x00,0x00,0x00,0x8c,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x10,0x00,0x00,0x00,0x92,0x00,0x00,0x00, -0x8e,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x93,0x00,0x00,0x00,0x92,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x55,0x00,0x00,0x00,0x95,0x00,0x00,0x00, -0x86,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x96,0x00,0x00,0x00,0x95,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x97,0x00,0x00,0x00, -0x96,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x10,0x00,0x00,0x00, -0x9a,0x00,0x00,0x00,0x97,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x9b,0x00,0x00,0x00, -0x9a,0x00,0x00,0x00,0x50,0x00,0x05,0x00,0x87,0x00,0x00,0x00, -0x9c,0x00,0x00,0x00,0x93,0x00,0x00,0x00,0x9b,0x00,0x00,0x00, -0x83,0x00,0x05,0x00,0x87,0x00,0x00,0x00,0xa0,0x00,0x00,0x00, -0x9c,0x00,0x00,0x00,0xc1,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x87,0x00,0x00,0x00,0xa2,0x00,0x00,0x00,0xa0,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xa9,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x50,0x00,0x00,0x00,0xac,0x00,0x00,0x00, -0xa2,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x5e,0x00,0x00,0x00,0xad,0x00,0x00,0x00,0xa6,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xa9,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xad,0x00,0x00,0x00,0xac,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xb1,0x00,0x00,0x00,0xa9,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0xb3,0x00,0x00,0x00,0xa2,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x5e,0x00,0x00,0x00,0xb4,0x00,0x00,0x00, -0xa6,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xb1,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xb4,0x00,0x00,0x00,0xb3,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0xb7,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0xb7,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, - -}; -const uint64_t get_rows_q5_0_len = 2868; - -unsigned char get_rows_q5_0_f32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0xc5,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, -0xa6,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x54,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x59,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x5a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x5a,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5c,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x5c,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0xa3,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0xa4,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0xa4,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0xa4,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0xa6,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0xa6,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0xb9,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, -0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x50,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x1c,0x00,0x04,0x00,0x54,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x55,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1c,0x00,0x04,0x00,0x57,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x1e,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x59,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x5a,0x00,0x00,0x00, -0x59,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x5b,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x5b,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x5e,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x63,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x7a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x84,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x55,0x00,0x00,0x00, -0x17,0x00,0x04,0x00,0x87,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x8d,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x9e,0x00,0x00,0x00,0x00,0x4c,0x00,0x00, -0x1d,0x00,0x03,0x00,0xa3,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0xa4,0x00,0x00,0x00,0xa3,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0xa5,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0xa4,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0xa5,0x00,0x00,0x00, -0xa6,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0xae,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xb8,0x00,0x00,0x00, -0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00, -0xb9,0x00,0x00,0x00,0xb8,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x2c,0x00,0x05,0x00,0x87,0x00,0x00,0x00, -0xc4,0x00,0x00,0x00,0x9e,0x00,0x00,0x00,0x9e,0x00,0x00,0x00, -0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0xba,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00, -0xbb,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xbb,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x10,0x00,0x00,0x00, -0x13,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x13,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x21,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0xae,0x00,0x05,0x00,0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x25,0x00,0x00,0x00,0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0xba,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x33,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x86,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00, -0x5e,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x60,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x63,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x66,0x00,0x00,0x00, -0x67,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x63,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x6c,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6d,0x00,0x00,0x00,0x68,0x00,0x00,0x00, -0x6c,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x73,0x00,0x00,0x00,0x6d,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0xc4,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x75,0x00,0x00,0x00, -0x73,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x75,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x7a,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x7c,0x00,0x00,0x00,0x6d,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, -0x7c,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x84,0x00,0x00,0x00,0x85,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x55,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x85,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x8b,0x00,0x00,0x00, -0x86,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x8c,0x00,0x00,0x00,0x8b,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x10,0x00,0x00,0x00,0x8e,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, -0x8d,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x10,0x00,0x00,0x00, -0x92,0x00,0x00,0x00,0x8e,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x93,0x00,0x00,0x00, -0x92,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x55,0x00,0x00,0x00, -0x95,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0x95,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x97,0x00,0x00,0x00,0x96,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x10,0x00,0x00,0x00,0x9a,0x00,0x00,0x00,0x97,0x00,0x00,0x00, -0x7e,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x9b,0x00,0x00,0x00,0x9a,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x87,0x00,0x00,0x00,0x9c,0x00,0x00,0x00,0x93,0x00,0x00,0x00, -0x9b,0x00,0x00,0x00,0x83,0x00,0x05,0x00,0x87,0x00,0x00,0x00, -0xa0,0x00,0x00,0x00,0x9c,0x00,0x00,0x00,0xc4,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x87,0x00,0x00,0x00,0xa2,0x00,0x00,0x00, -0xa0,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa9,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0xac,0x00,0x00,0x00,0xa2,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0xad,0x00,0x00,0x00, -0xac,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0xae,0x00,0x00,0x00, -0xaf,0x00,0x00,0x00,0xa6,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xa9,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xaf,0x00,0x00,0x00, -0xad,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xb3,0x00,0x00,0x00,0xa9,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x50,0x00,0x00,0x00,0xb5,0x00,0x00,0x00, -0xa2,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x1c,0x00,0x00,0x00,0xb6,0x00,0x00,0x00,0xb5,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0xae,0x00,0x00,0x00,0xb7,0x00,0x00,0x00, -0xa6,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xb3,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xb7,0x00,0x00,0x00,0xb6,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0xba,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0xba,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, - -}; -const uint64_t get_rows_q5_0_f32_len = 2916; - -unsigned char get_rows_q5_0_f32_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0xc2,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, +0x95,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, 0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, @@ -17597,7 +9110,7 @@ unsigned char get_rows_q5_0_f32_fp32_data[] = { 0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x0a,0x00, 0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, 0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0xa5,0x00,0x00,0x00, +0x2d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x81,0x00,0x00,0x00, 0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, 0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, 0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, @@ -17617,106 +9130,95 @@ unsigned char get_rows_q5_0_f32_fp32_data[] = { 0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00, 0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, 0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x59,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x5a,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x5a,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x5c,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5c,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0xa2,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0xa3,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0xa3,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0xa3,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0xa5,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0xa5,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0xb6,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, -0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00, -0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x52,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x53,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x55,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x05,0x00, +0x56,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x48,0x00,0x04,0x00, +0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, +0x58,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x5a,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x21,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x7e,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, +0x7f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x7f,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, +0x7f,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x81,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x81,0x00,0x00,0x00,0x21,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x92,0x00,0x00,0x00, +0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, +0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00, +0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, +0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00, +0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, +0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00, +0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00, +0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00, +0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, +0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x52,0x00,0x00,0x00, +0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x53,0x00,0x00,0x00, 0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0x55,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x1e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, -0x52,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x59,0x00,0x00,0x00,0x58,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x5a,0x00,0x00,0x00,0x59,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x5b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x5b,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x5e,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x64,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x84,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x88,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x9d,0x00,0x00,0x00,0x00,0x00,0x80,0x41,0x1d,0x00,0x03,0x00, -0xa2,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0xa3,0x00,0x00,0x00,0xa2,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0xa4,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0xa3,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0xa4,0x00,0x00,0x00,0xa5,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0xac,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb5,0x00,0x00,0x00,0x00,0x02,0x00,0x00, -0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00,0xb6,0x00,0x00,0x00, -0xb5,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x2c,0x00,0x05,0x00,0x88,0x00,0x00,0x00,0xc1,0x00,0x00,0x00, -0x9d,0x00,0x00,0x00,0x9d,0x00,0x00,0x00,0x36,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x10,0x00,0x00,0x00, +0x1c,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x53,0x00,0x00,0x00, +0x54,0x00,0x00,0x00,0x1e,0x00,0x05,0x00,0x56,0x00,0x00,0x00, +0x52,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x55,0x00,0x00,0x00, +0x1d,0x00,0x03,0x00,0x57,0x00,0x00,0x00,0x56,0x00,0x00,0x00, +0x1e,0x00,0x03,0x00,0x58,0x00,0x00,0x00,0x57,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x59,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x58,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x59,0x00,0x00,0x00, +0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x52,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x68,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x53,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x6c,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x74,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x7e,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x7f,0x00,0x00,0x00, +0x7e,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x80,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x7f,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0x80,0x00,0x00,0x00,0x81,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x88,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x91,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00, +0x09,0x00,0x00,0x00,0x92,0x00,0x00,0x00,0x91,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00, 0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0xb7,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0xb8,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xb8,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0xf7,0x00,0x03,0x00,0x93,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x94,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x94,0x00,0x00,0x00,0x41,0x00,0x05,0x00, 0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, 0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, 0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, @@ -17737,7 +9239,7 @@ unsigned char get_rows_q5_0_f32_fp32_data[] = { 0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00, 0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0xb7,0x00,0x00,0x00, +0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x93,0x00,0x00,0x00, 0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00, 0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, 0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, @@ -17758,80 +9260,54 @@ unsigned char get_rows_q5_0_f32_fp32_data[] = { 0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, 0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00, 0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5e,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00, +0x5d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, 0x45,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x61,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x64,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x66,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x67,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x69,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x64,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x6d,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x69,0x00,0x00,0x00, -0x6d,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0xc4,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x78,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x7c,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x7b,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x7f,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x84,0x00,0x00,0x00,0x85,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x55,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x85,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x87,0x00,0x00,0x00, -0x86,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x8d,0x00,0x00,0x00,0x87,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x91,0x00,0x00,0x00, -0x78,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x92,0x00,0x00,0x00,0x8d,0x00,0x00,0x00,0x91,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x93,0x00,0x00,0x00, -0x92,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x95,0x00,0x00,0x00,0x87,0x00,0x00,0x00,0x75,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x98,0x00,0x00,0x00, -0x7f,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x99,0x00,0x00,0x00,0x95,0x00,0x00,0x00,0x98,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x9a,0x00,0x00,0x00, -0x99,0x00,0x00,0x00,0x50,0x00,0x05,0x00,0x88,0x00,0x00,0x00, -0x9b,0x00,0x00,0x00,0x93,0x00,0x00,0x00,0x9a,0x00,0x00,0x00, -0x83,0x00,0x05,0x00,0x88,0x00,0x00,0x00,0x9f,0x00,0x00,0x00, -0x9b,0x00,0x00,0x00,0xc1,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x88,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0x9f,0x00,0x00,0x00, -0x61,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xa8,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00,0xab,0x00,0x00,0x00, -0xa1,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0xac,0x00,0x00,0x00,0xad,0x00,0x00,0x00,0xa5,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xa8,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xad,0x00,0x00,0x00,0xab,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xb1,0x00,0x00,0x00,0xa8,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00, -0xb3,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0xac,0x00,0x00,0x00,0xb4,0x00,0x00,0x00, -0xa5,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xb1,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xb4,0x00,0x00,0x00,0xb3,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0xb7,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0xb7,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, +0x52,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, +0x5e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00, +0x62,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x52,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x64,0x00,0x00,0x00, +0x63,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x68,0x00,0x00,0x00, +0x69,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x45,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, +0x69,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x6b,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, +0x70,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, +0x72,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, +0x74,0x00,0x00,0x00,0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, +0x76,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x50,0x00,0x05,0x00, +0x6c,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x72,0x00,0x00,0x00, +0x76,0x00,0x00,0x00,0x8e,0x00,0x05,0x00,0x6c,0x00,0x00,0x00, +0x7a,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, +0x50,0x00,0x05,0x00,0x6c,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, +0x64,0x00,0x00,0x00,0x64,0x00,0x00,0x00,0x81,0x00,0x05,0x00, +0x6c,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, +0x7c,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x84,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, +0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00,0x87,0x00,0x00,0x00, +0x7d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x88,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0x81,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x84,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, +0x89,0x00,0x00,0x00,0x87,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x8d,0x00,0x00,0x00,0x84,0x00,0x00,0x00, +0x54,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00, +0x8f,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x41,0x00,0x06,0x00,0x88,0x00,0x00,0x00,0x90,0x00,0x00,0x00, +0x81,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, +0x3e,0x00,0x03,0x00,0x90,0x00,0x00,0x00,0x8f,0x00,0x00,0x00, +0xf9,0x00,0x02,0x00,0x93,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, +0x93,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t get_rows_q5_0_f32_fp32_len = 2868; +const uint64_t get_rows_q4_1_f32_len = 2424; -unsigned char get_rows_q5_0_fp32_data[] = { +unsigned char get_rows_q5_0_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0xc3,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, @@ -18074,485 +9550,11 @@ unsigned char get_rows_q5_0_fp32_data[] = { 0xf8,0x00,0x02,0x00,0xb8,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, 0x38,0x00,0x01,0x00, }; -const uint64_t get_rows_q5_0_fp32_len = 2884; +const uint64_t get_rows_q5_0_len = 2884; -unsigned char get_rows_q5_1_data[] = { +unsigned char get_rows_q5_0_f32_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0xb4,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0xa1,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x58,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x5a,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x9e,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x9f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x9f,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x9f,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0xa1,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0xa1,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0xb1,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, -0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00, -0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, -0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x50,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x53,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x53,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x1e,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x57,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x58,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x59,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x59,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x67,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x7f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00, -0x17,0x00,0x04,0x00,0x82,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x9e,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x9f,0x00,0x00,0x00,0x9e,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0xa0,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x9f,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0xa0,0x00,0x00,0x00,0xa1,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xb0,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00, -0x09,0x00,0x00,0x00,0xb1,0x00,0x00,0x00,0xb0,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00, -0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0xb2,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0xb3,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xb3,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x10,0x00,0x00,0x00,0x13,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x13,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x21,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0xae,0x00,0x05,0x00, -0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0xb2,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x33,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x89,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, -0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00,0x61,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x61,0x00,0x00,0x00,0x41,0x00,0x07,0x00, -0x67,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x69,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x69,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0xc4,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x68,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x75,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0x75,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x7f,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x81,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x86,0x00,0x00,0x00,0x81,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x87,0x00,0x00,0x00,0x86,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x10,0x00,0x00,0x00,0x89,0x00,0x00,0x00, -0x87,0x00,0x00,0x00,0x88,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, -0x10,0x00,0x00,0x00,0x8d,0x00,0x00,0x00,0x89,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x8e,0x00,0x00,0x00,0x8d,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x53,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0x81,0x00,0x00,0x00, -0x6c,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x91,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x92,0x00,0x00,0x00,0x91,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x10,0x00,0x00,0x00,0x95,0x00,0x00,0x00, -0x92,0x00,0x00,0x00,0x78,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x96,0x00,0x00,0x00,0x95,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x97,0x00,0x00,0x00, -0x8e,0x00,0x00,0x00,0x96,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0x9a,0x00,0x00,0x00,0x97,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x50,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0x9c,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x81,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x9d,0x00,0x00,0x00, -0x9a,0x00,0x00,0x00,0x9c,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa4,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0xa7,0x00,0x00,0x00,0x9d,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x5c,0x00,0x00,0x00,0xa8,0x00,0x00,0x00, -0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xa4,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xa8,0x00,0x00,0x00,0xa7,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xac,0x00,0x00,0x00, -0xa4,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0xae,0x00,0x00,0x00,0x9d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x5c,0x00,0x00,0x00, -0xaf,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xac,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xaf,0x00,0x00,0x00, -0xae,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0xb2,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xb2,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, -0x38,0x00,0x01,0x00, -}; -const uint64_t get_rows_q5_1_len = 2764; - -unsigned char get_rows_q5_1_f32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0xb7,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0xa1,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x58,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x5a,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x9e,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x9f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x9f,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x9f,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0xa1,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0xa1,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0xb4,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, -0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00, -0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, -0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x50,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x53,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x53,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x1e,0x00,0x06,0x00,0x56,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x57,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x58,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x59,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x59,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x50,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x67,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x7f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00, -0x17,0x00,0x04,0x00,0x82,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x88,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x9e,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x9f,0x00,0x00,0x00,0x9e,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0xa0,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x9f,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0xa0,0x00,0x00,0x00,0xa1,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0xa9,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb3,0x00,0x00,0x00,0x00,0x02,0x00,0x00, -0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00,0xb4,0x00,0x00,0x00, -0xb3,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0xb5,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00, -0xb6,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xb6,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x10,0x00,0x00,0x00, -0x13,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x13,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x21,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0xae,0x00,0x05,0x00,0x24,0x00,0x00,0x00,0x25,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x25,0x00,0x00,0x00,0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0xb5,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x33,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x86,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00, -0x5c,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00, -0x61,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x61,0x00,0x00,0x00, -0x41,0x00,0x07,0x00,0x67,0x00,0x00,0x00,0x68,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x69,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0xc2,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x69,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0xc4,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x6d,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x6d,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x75,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x78,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x7f,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00, -0x81,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x81,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x87,0x00,0x00,0x00, -0x86,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x10,0x00,0x00,0x00, -0x89,0x00,0x00,0x00,0x87,0x00,0x00,0x00,0x88,0x00,0x00,0x00, -0xc5,0x00,0x05,0x00,0x10,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0x89,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x8e,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x53,0x00,0x00,0x00,0x90,0x00,0x00,0x00, -0x81,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0x71,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x91,0x00,0x00,0x00,0x90,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x92,0x00,0x00,0x00, -0x91,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x10,0x00,0x00,0x00, -0x95,0x00,0x00,0x00,0x92,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0x95,0x00,0x00,0x00,0x50,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0x97,0x00,0x00,0x00,0x8e,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x82,0x00,0x00,0x00,0x9a,0x00,0x00,0x00, -0x97,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x82,0x00,0x00,0x00,0x9c,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x81,0x00,0x05,0x00,0x82,0x00,0x00,0x00, -0x9d,0x00,0x00,0x00,0x9a,0x00,0x00,0x00,0x9c,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xa4,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0xa7,0x00,0x00,0x00,0x9d,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0xa8,0x00,0x00,0x00,0xa7,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0xa9,0x00,0x00,0x00,0xaa,0x00,0x00,0x00,0xa1,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xa4,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xaa,0x00,0x00,0x00,0xa8,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xae,0x00,0x00,0x00,0xa4,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x50,0x00,0x00,0x00, -0xb0,0x00,0x00,0x00,0x9d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0xb1,0x00,0x00,0x00, -0xb0,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0xa9,0x00,0x00,0x00, -0xb2,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xae,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xb2,0x00,0x00,0x00, -0xb1,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0xb5,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xb5,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, -0x38,0x00,0x01,0x00, -}; -const uint64_t get_rows_q5_1_f32_len = 2812; - -unsigned char get_rows_q5_1_f32_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0xb5,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, +0xc2,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, 0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, @@ -18560,7 +9562,7 @@ unsigned char get_rows_q5_1_f32_fp32_data[] = { 0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x0a,0x00, 0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, 0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0xa1,0x00,0x00,0x00, +0x2d,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0xa5,0x00,0x00,0x00, 0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, 0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, 0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, @@ -18580,101 +9582,106 @@ unsigned char get_rows_q5_1_f32_fp32_data[] = { 0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00, 0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, 0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x58,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, +0x47,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, 0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x58,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x5a,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x9e,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x9f,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x9f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x9f,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0xa1,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0xa1,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0xb2,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, -0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x58,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x23,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x59,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x48,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x5a,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x03,0x00,0x5a,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x5c,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5c,0x00,0x00,0x00, +0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0xa2,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00, +0x48,0x00,0x04,0x00,0xa3,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0xa3,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x03,0x00,0xa3,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0xa5,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0xa5,0x00,0x00,0x00, +0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0xb6,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, +0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, +0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00, 0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x52,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, -0x55,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x52,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x55,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x57,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x58,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x59,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x59,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x69,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x80,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x84,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x88,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x9e,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x9f,0x00,0x00,0x00, -0x9e,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0xa0,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x9f,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0xa0,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0xa8,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xb1,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00, -0x09,0x00,0x00,0x00,0xb2,0x00,0x00,0x00,0xb1,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00, +0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00, +0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00, +0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, +0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, +0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00, +0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00, +0x52,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00, +0x53,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x1c,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x53,0x00,0x00,0x00, +0x49,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x55,0x00,0x00,0x00, +0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x10,0x00,0x00,0x00, +0x1c,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0x55,0x00,0x00,0x00, +0x56,0x00,0x00,0x00,0x1e,0x00,0x05,0x00,0x58,0x00,0x00,0x00, +0x52,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x57,0x00,0x00,0x00, +0x1d,0x00,0x03,0x00,0x59,0x00,0x00,0x00,0x58,0x00,0x00,0x00, +0x1e,0x00,0x03,0x00,0x5a,0x00,0x00,0x00,0x59,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x5b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x5a,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x5b,0x00,0x00,0x00, +0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x5e,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x52,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x64,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x53,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x68,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x10,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x04,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x84,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x17,0x00,0x04,0x00, +0x88,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, +0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, +0x9d,0x00,0x00,0x00,0x00,0x00,0x80,0x41,0x1d,0x00,0x03,0x00, +0xa2,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0xa3,0x00,0x00,0x00,0xa2,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0xa4,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0xa3,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0xa4,0x00,0x00,0x00,0xa5,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0xac,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0xb5,0x00,0x00,0x00,0x00,0x02,0x00,0x00, +0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00,0xb6,0x00,0x00,0x00, +0xb5,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x2c,0x00,0x05,0x00,0x88,0x00,0x00,0x00,0xc1,0x00,0x00,0x00, +0x9d,0x00,0x00,0x00,0x9d,0x00,0x00,0x00,0x36,0x00,0x05,0x00, 0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0xb3,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0xb4,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xb4,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0xf7,0x00,0x03,0x00,0xb7,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0xb8,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0xb8,0x00,0x00,0x00,0x41,0x00,0x05,0x00, 0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, 0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, 0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, @@ -18695,7 +9702,7 @@ unsigned char get_rows_q5_1_f32_fp32_data[] = { 0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00, 0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0xb3,0x00,0x00,0x00, +0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0xb7,0x00,0x00,0x00, 0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00, 0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, 0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, @@ -18716,77 +9723,80 @@ unsigned char get_rows_q5_1_f32_fp32_data[] = { 0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, 0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00, 0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5e,0x00,0x00,0x00, +0x5f,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, 0x45,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0x63,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x69,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x71,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x78,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x79,0x00,0x00,0x00, -0x78,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x7a,0x00,0x00,0x00,0x79,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x80,0x00,0x00,0x00,0x81,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x7e,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0x81,0x00,0x00,0x00, -0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x83,0x00,0x00,0x00, -0x82,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x89,0x00,0x00,0x00,0x83,0x00,0x00,0x00,0x88,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0x71,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x8e,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x8f,0x00,0x00,0x00, -0x8e,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x91,0x00,0x00,0x00,0x83,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x94,0x00,0x00,0x00, -0x7a,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x95,0x00,0x00,0x00,0x91,0x00,0x00,0x00,0x94,0x00,0x00,0x00, -0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0x95,0x00,0x00,0x00,0x50,0x00,0x05,0x00,0x84,0x00,0x00,0x00, -0x97,0x00,0x00,0x00,0x8f,0x00,0x00,0x00,0x96,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x84,0x00,0x00,0x00,0x9a,0x00,0x00,0x00, -0x97,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x50,0x00,0x05,0x00, -0x84,0x00,0x00,0x00,0x9c,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x81,0x00,0x05,0x00,0x84,0x00,0x00,0x00, -0x9d,0x00,0x00,0x00,0x9a,0x00,0x00,0x00,0x9c,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xa4,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x51,0x00,0x05,0x00, -0x1c,0x00,0x00,0x00,0xa7,0x00,0x00,0x00,0x9d,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0xa8,0x00,0x00,0x00, -0xa9,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xa4,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xa9,0x00,0x00,0x00, -0xa7,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xad,0x00,0x00,0x00,0xa4,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00,0xaf,0x00,0x00,0x00, -0x9d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0xa8,0x00,0x00,0x00,0xb0,0x00,0x00,0x00,0xa1,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xad,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xb0,0x00,0x00,0x00,0xaf,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0xb3,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xb3,0x00,0x00,0x00, -0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, -}; -const uint64_t get_rows_q5_1_f32_fp32_len = 2780; +0x52,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x61,0x00,0x00,0x00, +0x60,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x64,0x00,0x00,0x00, +0x65,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x66,0x00,0x00,0x00, +0x65,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x67,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x69,0x00,0x00,0x00,0x67,0x00,0x00,0x00, +0x68,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x64,0x00,0x00,0x00, +0x6b,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, +0x6b,0x00,0x00,0x00,0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x6d,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0xc5,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x69,0x00,0x00,0x00, +0x6d,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x74,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, +0xc4,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00, +0x74,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0xc7,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x76,0x00,0x00,0x00, +0x56,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x78,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x7c,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, +0x7b,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x7d,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, +0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, +0x7d,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, +0x10,0x00,0x00,0x00,0x7f,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, +0x41,0x00,0x08,0x00,0x84,0x00,0x00,0x00,0x85,0x00,0x00,0x00, +0x5c,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, +0x12,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x55,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x85,0x00,0x00,0x00, +0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x87,0x00,0x00,0x00, +0x86,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x8d,0x00,0x00,0x00,0x87,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, +0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x91,0x00,0x00,0x00, +0x78,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x92,0x00,0x00,0x00,0x8d,0x00,0x00,0x00,0x91,0x00,0x00,0x00, +0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x93,0x00,0x00,0x00, +0x92,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x95,0x00,0x00,0x00,0x87,0x00,0x00,0x00,0x75,0x00,0x00,0x00, +0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x98,0x00,0x00,0x00, +0x7f,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x99,0x00,0x00,0x00,0x95,0x00,0x00,0x00,0x98,0x00,0x00,0x00, +0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x9a,0x00,0x00,0x00, +0x99,0x00,0x00,0x00,0x50,0x00,0x05,0x00,0x88,0x00,0x00,0x00, +0x9b,0x00,0x00,0x00,0x93,0x00,0x00,0x00,0x9a,0x00,0x00,0x00, +0x83,0x00,0x05,0x00,0x88,0x00,0x00,0x00,0x9f,0x00,0x00,0x00, +0x9b,0x00,0x00,0x00,0xc1,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, +0x88,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0x9f,0x00,0x00,0x00, +0x61,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0xa8,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, +0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00,0xab,0x00,0x00,0x00, +0xa1,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0xac,0x00,0x00,0x00,0xad,0x00,0x00,0x00,0xa5,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0xa8,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, +0xad,0x00,0x00,0x00,0xab,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0xb1,0x00,0x00,0x00,0xa8,0x00,0x00,0x00, +0x56,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00, +0xb3,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x41,0x00,0x06,0x00,0xac,0x00,0x00,0x00,0xb4,0x00,0x00,0x00, +0xa5,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xb1,0x00,0x00,0x00, +0x3e,0x00,0x03,0x00,0xb4,0x00,0x00,0x00,0xb3,0x00,0x00,0x00, +0xf9,0x00,0x02,0x00,0xb7,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, +0xb7,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, -unsigned char get_rows_q5_1_fp32_data[] = { +}; +const uint64_t get_rows_q5_0_f32_len = 2868; + +unsigned char get_rows_q5_1_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0xb6,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, @@ -19022,61 +10032,63 @@ unsigned char get_rows_q5_1_fp32_data[] = { 0xb4,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t get_rows_q5_1_fp32_len = 2796; +const uint64_t get_rows_q5_1_len = 2796; -unsigned char get_rows_q8_0_data[] = { +unsigned char get_rows_q5_1_f32_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x86,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x58,0x00,0x00,0x00, -0x73,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x53,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x54,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x54,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x56,0x00,0x00,0x00, +0xb5,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, +0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, +0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, +0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, +0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00,0x0e,0x00,0x03,0x00, +0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x0a,0x00, +0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, +0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, +0x2d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0xa1,0x00,0x00,0x00, +0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, +0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, +0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, +0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x2b,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x56,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x58,0x00,0x00,0x00, +0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00, 0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x58,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x70,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x71,0x00,0x00,0x00, +0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x05,0x00, +0x56,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, +0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x18,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x58,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, +0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x58,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x5a,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x9e,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x9f,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x71,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x71,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x73,0x00,0x00,0x00, +0x9f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x9f,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0xa1,0x00,0x00,0x00, 0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x73,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x83,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, +0xa1,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0xb2,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, 0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, 0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, 0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, @@ -19108,226 +10120,45 @@ unsigned char get_rows_q8_0_data[] = { 0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, 0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x4f,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x1e,0x00,0x04,0x00,0x54,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x55,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x56,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0x58,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x5d,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x62,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x52,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x70,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x71,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x72,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x72,0x00,0x00,0x00,0x73,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x82,0x00,0x00,0x00, -0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00, -0x83,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x84,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00, -0x0c,0x00,0x00,0x00,0x85,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x85,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00, -0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x10,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x0d,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x21,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0xae,0x00,0x05,0x00,0x24,0x00,0x00,0x00, -0x25,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00,0x26,0x00,0x00,0x00, -0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x26,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x84,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x30,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x38,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x86,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x89,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0x41,0x00,0x07,0x00,0x5a,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4f,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x62,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x58,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4f,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x68,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x41,0x00,0x08,0x00, -0x62,0x00,0x00,0x00,0x69,0x00,0x00,0x00,0x58,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x69,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x4f,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x5d,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x5d,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4f,0x00,0x00,0x00,0x79,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x5a,0x00,0x00,0x00,0x7a,0x00,0x00,0x00,0x73,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x7a,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x4f,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x5a,0x00,0x00,0x00,0x81,0x00,0x00,0x00, -0x73,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x81,0x00,0x00,0x00,0x80,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x84,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x84,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, - -}; -const uint64_t get_rows_q8_0_len = 2232; - -unsigned char get_rows_q8_0_f32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x89,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x51,0x11,0x00,0x00,0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00, -0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, -0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, -0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x58,0x00,0x00,0x00, -0x73,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, -0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x53,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x54,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x54,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x56,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x56,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x58,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x58,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x70,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x71,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x71,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x71,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x73,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x73,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x86,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, -0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x4f,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x1c,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x1e,0x00,0x04,0x00,0x54,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x55,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x56,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0x58,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x5a,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x17,0x00,0x04,0x00, -0x5d,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x62,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x52,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x70,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x71,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x72,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x72,0x00,0x00,0x00,0x73,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x7b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x16,0x00,0x03,0x00,0x52,0x00,0x00,0x00,0x10,0x00,0x00,0x00, +0x15,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x08,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x54,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, +0x55,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x54,0x00,0x00,0x00, +0x1e,0x00,0x06,0x00,0x56,0x00,0x00,0x00,0x52,0x00,0x00,0x00, +0x52,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x55,0x00,0x00,0x00, +0x1d,0x00,0x03,0x00,0x57,0x00,0x00,0x00,0x56,0x00,0x00,0x00, +0x1e,0x00,0x03,0x00,0x58,0x00,0x00,0x00,0x57,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x59,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x58,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x59,0x00,0x00,0x00, +0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x52,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x69,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x6e,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, +0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x80,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x17,0x00,0x04,0x00, +0x84,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x88,0x00,0x00,0x00, +0x0f,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x9e,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x9f,0x00,0x00,0x00, +0x9e,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0xa0,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x9f,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0xa0,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0xa8,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, 0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x85,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00, -0x09,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x85,0x00,0x00,0x00, +0xb1,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00, +0x09,0x00,0x00,0x00,0xb2,0x00,0x00,0x00,0xb1,0x00,0x00,0x00, 0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00, 0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x87,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x88,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x88,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0xf7,0x00,0x03,0x00,0xb3,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0xb4,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0xb4,0x00,0x00,0x00,0x41,0x00,0x05,0x00, 0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, 0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, 0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, @@ -19348,7 +10179,7 @@ unsigned char get_rows_q8_0_f32_data[] = { 0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00, 0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x87,0x00,0x00,0x00, +0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0xb3,0x00,0x00,0x00, 0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00, 0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, 0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, @@ -19365,247 +10196,81 @@ unsigned char get_rows_q8_0_f32_data[] = { 0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x89,0x00,0x05,0x00, 0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, 0x44,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, +0x4a,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x49,0x00,0x00,0x00, +0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, 0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x4d,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5a,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x41,0x00,0x00,0x00, +0x4e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00, +0x5d,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, 0x45,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4f,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x62,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x64,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x62,0x00,0x00,0x00,0x69,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x69,0x00,0x00,0x00, -0x6f,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x50,0x00,0x05,0x00,0x5d,0x00,0x00,0x00, -0x6c,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, -0x8e,0x00,0x05,0x00,0x5d,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x6c,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x4f,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x7b,0x00,0x00,0x00, -0x7c,0x00,0x00,0x00,0x73,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x7c,0x00,0x00,0x00, -0x7a,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x4f,0x00,0x00,0x00,0x82,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x1c,0x00,0x00,0x00,0x83,0x00,0x00,0x00,0x82,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x7b,0x00,0x00,0x00,0x84,0x00,0x00,0x00, -0x73,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x80,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x84,0x00,0x00,0x00,0x83,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x87,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x87,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, - +0x52,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, +0x5e,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x5c,0x00,0x00,0x00, +0x62,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x52,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x64,0x00,0x00,0x00, +0x63,0x00,0x00,0x00,0x41,0x00,0x07,0x00,0x69,0x00,0x00,0x00, +0x6a,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x45,0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, +0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, +0x6b,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0xc4,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, +0x6e,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x70,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x54,0x00,0x00,0x00, +0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x71,0x00,0x00,0x00, +0x70,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x74,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, +0x76,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x78,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0x77,0x00,0x00,0x00, +0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x79,0x00,0x00,0x00, +0x78,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, +0x10,0x00,0x00,0x00,0x7a,0x00,0x00,0x00,0x79,0x00,0x00,0x00, +0x41,0x00,0x08,0x00,0x80,0x00,0x00,0x00,0x81,0x00,0x00,0x00, +0x5a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, +0x7e,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x53,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0x81,0x00,0x00,0x00, +0x71,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x83,0x00,0x00,0x00, +0x82,0x00,0x00,0x00,0xc7,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x89,0x00,0x00,0x00,0x83,0x00,0x00,0x00,0x88,0x00,0x00,0x00, +0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, +0x71,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x8e,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0x8d,0x00,0x00,0x00, +0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x8f,0x00,0x00,0x00, +0x8e,0x00,0x00,0x00,0xc2,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x91,0x00,0x00,0x00,0x83,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, +0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x94,0x00,0x00,0x00, +0x7a,0x00,0x00,0x00,0xc5,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x95,0x00,0x00,0x00,0x91,0x00,0x00,0x00,0x94,0x00,0x00,0x00, +0x70,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x96,0x00,0x00,0x00, +0x95,0x00,0x00,0x00,0x50,0x00,0x05,0x00,0x84,0x00,0x00,0x00, +0x97,0x00,0x00,0x00,0x8f,0x00,0x00,0x00,0x96,0x00,0x00,0x00, +0x8e,0x00,0x05,0x00,0x84,0x00,0x00,0x00,0x9a,0x00,0x00,0x00, +0x97,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x50,0x00,0x05,0x00, +0x84,0x00,0x00,0x00,0x9c,0x00,0x00,0x00,0x64,0x00,0x00,0x00, +0x64,0x00,0x00,0x00,0x81,0x00,0x05,0x00,0x84,0x00,0x00,0x00, +0x9d,0x00,0x00,0x00,0x9a,0x00,0x00,0x00,0x9c,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xa4,0x00,0x00,0x00, +0x4f,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x51,0x00,0x05,0x00, +0x1c,0x00,0x00,0x00,0xa7,0x00,0x00,0x00,0x9d,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0xa8,0x00,0x00,0x00, +0xa9,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0xa4,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xa9,0x00,0x00,0x00, +0xa7,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0xad,0x00,0x00,0x00,0xa4,0x00,0x00,0x00,0x54,0x00,0x00,0x00, +0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00,0xaf,0x00,0x00,0x00, +0x9d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0xa8,0x00,0x00,0x00,0xb0,0x00,0x00,0x00,0xa1,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0xad,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, +0xb0,0x00,0x00,0x00,0xaf,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, +0xb3,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xb3,0x00,0x00,0x00, +0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t get_rows_q8_0_f32_len = 2280; +const uint64_t get_rows_q5_1_f32_len = 2780; -unsigned char get_rows_q8_0_f32_fp32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x8a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, -0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, -0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, -0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00,0x0e,0x00,0x03,0x00, -0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x0a,0x00, -0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, -0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x2b,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x54,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x54,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x55,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x56,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x56,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x58,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x58,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x73,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x74,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x74,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x74,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x76,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x76,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x87,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, -0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00, -0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x51,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x52,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x1e,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x55,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x56,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x57,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x51,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x5e,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x63,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x73,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x74,0x00,0x00,0x00,0x73,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x75,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x75,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x7d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x86,0x00,0x00,0x00, -0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00, -0x87,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x88,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00, -0x0c,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x89,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00, -0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x10,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x0d,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x21,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0xae,0x00,0x05,0x00,0x24,0x00,0x00,0x00, -0x25,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00,0x26,0x00,0x00,0x00, -0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x26,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x88,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x30,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x38,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x86,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x89,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, -0x41,0x00,0x07,0x00,0x5a,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x51,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x1c,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, -0x41,0x00,0x08,0x00,0x63,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0x72,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x66,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x67,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x63,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x52,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0x6b,0x00,0x00,0x00,0x72,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x6d,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, -0x1c,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, -0x50,0x00,0x05,0x00,0x5e,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x67,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, -0x5e,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0x72,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x7d,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x7e,0x00,0x00,0x00,0x7c,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0x79,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00, -0x84,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x7d,0x00,0x00,0x00,0x85,0x00,0x00,0x00, -0x76,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x82,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x85,0x00,0x00,0x00,0x84,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x88,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x88,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, - -}; -const uint64_t get_rows_q8_0_f32_fp32_len = 2280; - -unsigned char get_rows_q8_0_fp32_data[] = { +unsigned char get_rows_q8_0_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, 0x8b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, @@ -19799,7 +10464,202 @@ unsigned char get_rows_q8_0_fp32_data[] = { 0xf8,0x00,0x02,0x00,0x89,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, 0x38,0x00,0x01,0x00, }; -const uint64_t get_rows_q8_0_fp32_len = 2296; +const uint64_t get_rows_q8_0_len = 2296; + +unsigned char get_rows_q8_0_f32_data[] = { +0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, +0x8a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, +0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, +0x11,0x00,0x02,0x00,0x60,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, +0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, +0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00,0x0e,0x00,0x03,0x00, +0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x0a,0x00, +0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, +0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, +0x2d,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x76,0x00,0x00,0x00, +0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, +0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, +0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, +0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x2b,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, +0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x54,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x54,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x23,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x55,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x48,0x00,0x04,0x00,0x56,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x56,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x03,0x00,0x56,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x58,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x58,0x00,0x00,0x00, +0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x73,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00, +0x48,0x00,0x04,0x00,0x74,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x74,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x03,0x00,0x74,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x76,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x76,0x00,0x00,0x00, +0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x87,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, +0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, +0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0a,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x0d,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x15,0x00,0x04,0x00, +0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x12,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00, +0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x1e,0x00,0x06,0x00, +0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, +0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, +0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00, +0x20,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x51,0x00,0x00,0x00, +0x10,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x52,0x00,0x00,0x00, +0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, +0x53,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x44,0x00,0x00,0x00, +0x1e,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x51,0x00,0x00,0x00, +0x53,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x55,0x00,0x00,0x00, +0x54,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x56,0x00,0x00,0x00, +0x55,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x57,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0x57,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x51,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x5e,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x63,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x52,0x00,0x00,0x00, +0x1d,0x00,0x03,0x00,0x73,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, +0x1e,0x00,0x03,0x00,0x74,0x00,0x00,0x00,0x73,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x75,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x74,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x75,0x00,0x00,0x00, +0x76,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x7d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x86,0x00,0x00,0x00, +0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00, +0x87,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, +0x88,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00, +0x0c,0x00,0x00,0x00,0x89,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, +0x89,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00, +0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, +0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x11,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x84,0x00,0x05,0x00, +0x10,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x11,0x00,0x00,0x00, +0x12,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x14,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0x0d,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x18,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, +0x10,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x18,0x00,0x00,0x00, +0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, +0x19,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x21,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x20,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0xae,0x00,0x05,0x00,0x24,0x00,0x00,0x00, +0x25,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00,0x26,0x00,0x00,0x00, +0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x26,0x00,0x00,0x00, +0xf9,0x00,0x02,0x00,0x88,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, +0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x30,0x00,0x00,0x00, +0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x32,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x32,0x00,0x00,0x00, +0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00, +0x33,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x38,0x00,0x00,0x00, +0x14,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00, +0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x86,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, +0x44,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x48,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00, +0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00, +0x48,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x89,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x41,0x00,0x00,0x00, +0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x4e,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, +0x41,0x00,0x07,0x00,0x5a,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, +0x58,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x51,0x00,0x00,0x00, +0x5c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00, +0x1c,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, +0x41,0x00,0x08,0x00,0x63,0x00,0x00,0x00,0x64,0x00,0x00,0x00, +0x58,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, +0x20,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x52,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x64,0x00,0x00,0x00, +0x72,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x66,0x00,0x00,0x00, +0x65,0x00,0x00,0x00,0x6f,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, +0x67,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x49,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x41,0x00,0x08,0x00,0x63,0x00,0x00,0x00, +0x6b,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x45,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x52,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, +0x6b,0x00,0x00,0x00,0x72,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x6d,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0x6f,0x00,0x04,0x00, +0x1c,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, +0x50,0x00,0x05,0x00,0x5e,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, +0x67,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x8e,0x00,0x05,0x00, +0x5e,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, +0x5d,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x79,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x49,0x00,0x00,0x00, +0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, +0x72,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x7d,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,0x76,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, +0x7e,0x00,0x00,0x00,0x7c,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0x79,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x51,0x00,0x05,0x00,0x1c,0x00,0x00,0x00, +0x84,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x41,0x00,0x06,0x00,0x7d,0x00,0x00,0x00,0x85,0x00,0x00,0x00, +0x76,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x82,0x00,0x00,0x00, +0x3e,0x00,0x03,0x00,0x85,0x00,0x00,0x00,0x84,0x00,0x00,0x00, +0xf9,0x00,0x02,0x00,0x88,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, +0x88,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, + +}; +const uint64_t get_rows_q8_0_f32_len = 2280; unsigned char matmul_f16_aligned_l_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index b1e0006bb..14fb89e09 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -1,6 +1,6 @@ #include "ggml-vulkan.h" -#ifdef VK_RUN_TESTS +#ifdef GGML_VULKAN_RUN_TESTS #include #endif @@ -255,6 +255,7 @@ static size_t vk_staging_offset; static vk_buffer vk_sync_staging; static vk_context * vk_ctx; +static vk_context * vk_transfer_ctx; static bool vk_disable; @@ -264,7 +265,7 @@ size_t vk_output_tensor; #endif static vk_pipeline ggml_vk_create_pipeline(const std::string& name, size_t spv_size, const void* spv_data, const std::string& entrypoint, uint32_t parameter_count, uint32_t push_constant_size, std::array wg_denoms, std::vector&& specialization_constants, uint32_t align) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_create_pipeline(" << name << ", " << entrypoint << ", " << parameter_count << ", " << push_constant_size << ", (" << wg_denoms[0] << "," << wg_denoms[1] << "," << wg_denoms[2] << "), specialization_constants, " << align << ")" << std::endl; #endif GGML_ASSERT(parameter_count > 0); @@ -368,7 +369,7 @@ static vk_pipeline ggml_vk_create_pipeline(const std::string& name, size_t spv_s } static void ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline& pipeline, uint32_t n) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_pipeline_allocate_descriptor_sets(" << pipeline.name << ", " << n << ")" << std::endl; #endif // Check if gc already contains pipeline before adding it @@ -413,14 +414,14 @@ static void ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline& pipeline, uin } static void ggml_vk_pipeline_cleanup(vk_pipeline& pipeline) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_pipeline_cleanup(" << pipeline.name << ")" << std::endl; #endif pipeline.descriptor_set_idx = 0; } static vk::CommandBuffer ggml_vk_create_cmd_buffer(vk_queue& q) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_create_cmd_buffer()" << std::endl; #endif if (q.cmd_buffers.size() > q.cmd_buffer_idx) { @@ -442,7 +443,7 @@ static vk::CommandBuffer ggml_vk_create_cmd_buffer(vk_queue& q) { } static vk_submission ggml_vk_create_submission(vk_queue& q, std::vector wait_semaphores, std::vector signal_semaphores) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_create_submission()" << std::endl; #endif vk_submission s; @@ -453,14 +454,14 @@ static vk_submission ggml_vk_create_submission(vk_queue& q, std::vector wait_semaphores, std::vector signal_semaphores) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_create_sequence_1()" << std::endl; #endif return { ggml_vk_create_submission(q, std::move(wait_semaphores), std::move(signal_semaphores)) }; } static void ggml_vk_submit(vk_context * ctx, vk::Fence fence) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_submit(" << ctx->seqs.size() << ", " << fence << ")" << std::endl; #endif if (ctx->seqs.empty()) { @@ -536,7 +537,7 @@ static void ggml_vk_submit(vk_context * ctx, vk::Fence fence) { } static uint32_t ggml_vk_find_queue_family_index(std::vector& queue_family_props, const vk::QueueFlags& required, const vk::QueueFlags& avoid, int32_t compute_index, uint32_t min_num_queues) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_find_queue_family_index()" << std::endl; #endif const uint32_t qfsize = queue_family_props.size(); @@ -578,7 +579,7 @@ static uint32_t ggml_vk_find_queue_family_index(std::vector= vk_gc.tl_semaphores.size()) { @@ -642,7 +643,7 @@ static vk::Event ggml_vk_create_event() { } static void ggml_vk_queue_cleanup(vk_queue& q) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_queue_cleanup()" << std::endl; #endif // Requires command buffers to be done @@ -652,7 +653,7 @@ static void ggml_vk_queue_cleanup(vk_queue& q) { } static vk_buffer ggml_vk_create_buffer(size_t size, vk::MemoryPropertyFlags req_flags) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_create_buffer(" << size << ", " << to_string(req_flags) << ")" << std::endl; #endif GGML_ASSERT(size > 0); @@ -743,7 +744,7 @@ static void ggml_vk_destroy_buffer(vk_buffer& buf) { if (buf.size == 0) { return; } -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_destroy_buffer(" << buf.size << ")" << std::endl; #endif @@ -757,7 +758,7 @@ static vk_subbuffer ggml_vk_subbuffer(vk_buffer& buf) { } static void ggml_vk_sync_buffers(vk_context * ctx) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_sync_buffers()" << std::endl; #endif const std::vector mem_barriers{ { { vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite }, { vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite } } }; @@ -773,7 +774,7 @@ static void ggml_vk_sync_buffers(vk_context * ctx) { } static void ggml_vk_wait_events(vk::CommandBuffer& cmd_buffer, std::vector&& events, vk::PipelineStageFlags src_stages, vk::PipelineStageFlags dst_stages) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_wait_events()" << std::endl; #endif if (events.empty()) { @@ -810,7 +811,7 @@ static bool ggml_vk_build_shader(ggml_type type) { } static void ggml_vk_load_shaders() { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_load_shaders()" << std::endl; #endif @@ -849,36 +850,6 @@ static void ggml_vk_load_shaders() { vk_pipeline_matmul_f16_f32_aligned_l = ggml_vk_create_pipeline("matmul_f16_f32_aligned_l", matmul_f16_f32_aligned_l_len, matmul_f16_f32_aligned_l_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); vk_pipeline_matmul_f16_f32_aligned_m = ggml_vk_create_pipeline("matmul_f16_f32_aligned_m", matmul_f16_f32_aligned_m_len, matmul_f16_f32_aligned_m_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); vk_pipeline_matmul_f16_f32_aligned_s = ggml_vk_create_pipeline("matmul_f16_f32_aligned_s", matmul_f16_f32_aligned_s_len, matmul_f16_f32_aligned_s_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); - - // Build dequant shaders - vk_pipeline_dequant[GGML_TYPE_F32] = ggml_vk_create_pipeline("f32_to_f16", f32_to_f16_len, f32_to_f16_data, "main", 2, 4 * sizeof(int), {64, 1, 1}, {}, 1); - - vk_pipeline_dequant[GGML_TYPE_F16] = ggml_vk_create_pipeline("dequant_f16", dequant_f16_len, dequant_f16_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("dequant_q4_0", dequant_q4_0_len, dequant_q4_0_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("dequant_q4_1", dequant_q4_1_len, dequant_q4_1_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("dequant_q5_0", dequant_q5_0_len, dequant_q5_0_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("dequant_q5_1", dequant_q5_1_len, dequant_q5_1_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("dequant_q8_0", dequant_q8_0_len, dequant_q8_0_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q2_K] = ggml_vk_create_pipeline("dequant_q2_K", dequant_q2_K_len, dequant_q2_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q3_K] = ggml_vk_create_pipeline("dequant_q3_K", dequant_q3_K_len, dequant_q3_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q4_K] = ggml_vk_create_pipeline("dequant_q4_K", dequant_q4_K_len, dequant_q4_K_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q5_K] = ggml_vk_create_pipeline("dequant_q5_K", dequant_q5_K_len, dequant_q5_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q6_K] = ggml_vk_create_pipeline("dequant_q6_K", dequant_q6_K_len, dequant_q6_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); - - // get_rows - vk_pipeline_get_rows[GGML_TYPE_F16] = ggml_vk_create_pipeline("get_rows_f16", get_rows_f16_len, get_rows_f16_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("get_rows_q4_0", get_rows_q4_0_len, get_rows_q4_0_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("get_rows_q4_1", get_rows_q4_1_len, get_rows_q4_1_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("get_rows_q5_0", get_rows_q5_0_len, get_rows_q5_0_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("get_rows_q5_1", get_rows_q5_1_len, get_rows_q5_1_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("get_rows_q8_0", get_rows_q8_0_len, get_rows_q8_0_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - - vk_pipeline_get_rows_f32[GGML_TYPE_F16] = ggml_vk_create_pipeline("get_rows_f16_f32", get_rows_f16_f32_len, get_rows_f16_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("get_rows_q4_0_f32", get_rows_q4_0_f32_len, get_rows_q4_0_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("get_rows_q4_1_f32", get_rows_q4_1_f32_len, get_rows_q4_1_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("get_rows_q5_0_f32", get_rows_q5_0_f32_len, get_rows_q5_0_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("get_rows_q5_1_f32", get_rows_q5_1_f32_len, get_rows_q5_1_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("get_rows_q8_0_f32", get_rows_q8_0_f32_len, get_rows_q8_0_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); } else { vk_pipeline_matmul_f32_l = ggml_vk_create_pipeline("matmul_f32_l", matmul_f32_l_fp32_len, matmul_f32_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, 1); vk_pipeline_matmul_f32_m = ggml_vk_create_pipeline("matmul_f32_m", matmul_f32_m_fp32_len, matmul_f32_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, 1); @@ -901,36 +872,6 @@ static void ggml_vk_load_shaders() { vk_pipeline_matmul_f16_f32_aligned_l = ggml_vk_create_pipeline("matmul_f16_f32_aligned_l", matmul_f16_f32_aligned_l_fp32_len, matmul_f16_f32_aligned_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); vk_pipeline_matmul_f16_f32_aligned_m = ggml_vk_create_pipeline("matmul_f16_f32_aligned_m", matmul_f16_f32_aligned_m_fp32_len, matmul_f16_f32_aligned_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); vk_pipeline_matmul_f16_f32_aligned_s = ggml_vk_create_pipeline("matmul_f16_f32_aligned_s", matmul_f16_f32_aligned_s_fp32_len, matmul_f16_f32_aligned_s_fp32_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); - - // Build dequant shaders - vk_pipeline_dequant[GGML_TYPE_F32] = ggml_vk_create_pipeline("f32_to_f16", f32_to_f16_fp32_len, f32_to_f16_fp32_data, "main", 2, 4 * sizeof(int), {64, 1, 1}, {}, 1); - - vk_pipeline_dequant[GGML_TYPE_F16] = ggml_vk_create_pipeline("dequant_f16", dequant_f16_fp32_len, dequant_f16_fp32_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("dequant_q4_0", dequant_q4_0_fp32_len, dequant_q4_0_fp32_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("dequant_q4_1", dequant_q4_1_fp32_len, dequant_q4_1_fp32_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("dequant_q5_0", dequant_q5_0_fp32_len, dequant_q5_0_fp32_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("dequant_q5_1", dequant_q5_1_fp32_len, dequant_q5_1_fp32_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("dequant_q8_0", dequant_q8_0_fp32_len, dequant_q8_0_fp32_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q2_K] = ggml_vk_create_pipeline("dequant_q2_K", dequant_q2_K_fp32_len, dequant_q2_K_fp32_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q3_K] = ggml_vk_create_pipeline("dequant_q3_K", dequant_q3_K_fp32_len, dequant_q3_K_fp32_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q4_K] = ggml_vk_create_pipeline("dequant_q4_K", dequant_q4_K_fp32_len, dequant_q4_K_fp32_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q5_K] = ggml_vk_create_pipeline("dequant_q5_K", dequant_q5_K_fp32_len, dequant_q5_K_fp32_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q6_K] = ggml_vk_create_pipeline("dequant_q6_K", dequant_q6_K_fp32_len, dequant_q6_K_fp32_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - - // get_rows - vk_pipeline_get_rows[GGML_TYPE_F16] = ggml_vk_create_pipeline("get_rows_f16", get_rows_f16_fp32_len, get_rows_f16_fp32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("get_rows_q4_0", get_rows_q4_0_fp32_len, get_rows_q4_0_fp32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("get_rows_q4_1", get_rows_q4_1_fp32_len, get_rows_q4_1_fp32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("get_rows_q5_0", get_rows_q5_0_fp32_len, get_rows_q5_0_fp32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("get_rows_q5_1", get_rows_q5_1_fp32_len, get_rows_q5_1_fp32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("get_rows_q8_0", get_rows_q8_0_fp32_len, get_rows_q8_0_fp32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - - vk_pipeline_get_rows_f32[GGML_TYPE_F16] = ggml_vk_create_pipeline("get_rows_f16_f32", get_rows_f16_f32_fp32_len, get_rows_f16_f32_fp32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("get_rows_q4_0_f32", get_rows_q4_0_f32_fp32_len, get_rows_q4_0_f32_fp32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("get_rows_q4_1_f32", get_rows_q4_1_f32_fp32_len, get_rows_q4_1_f32_fp32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("get_rows_q5_0_f32", get_rows_q5_0_f32_fp32_len, get_rows_q5_0_f32_fp32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("get_rows_q5_1_f32", get_rows_q5_1_f32_fp32_len, get_rows_q5_1_f32_fp32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("get_rows_q8_0_f32", get_rows_q8_0_f32_fp32_len, get_rows_q8_0_f32_fp32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); } vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_F16] = ggml_vk_create_pipeline("mul_mat_vec_f16_f32", mul_mat_vec_f16_f32_len, mul_mat_vec_f16_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); @@ -945,6 +886,36 @@ static void ggml_vk_load_shaders() { vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q5_K] = ggml_vk_create_pipeline("mul_mat_vec_q5_K_f32", mul_mat_vec_q5_K_f32_len, mul_mat_vec_q5_K_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q6_K] = ggml_vk_create_pipeline("mul_mat_vec_q6_K_f32", mul_mat_vec_q6_K_f32_len, mul_mat_vec_q6_K_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); + // dequant shaders + vk_pipeline_dequant[GGML_TYPE_F32] = ggml_vk_create_pipeline("f32_to_f16", f32_to_f16_len, f32_to_f16_data, "main", 2, 4 * sizeof(int), {64, 1, 1}, {}, 1); + + vk_pipeline_dequant[GGML_TYPE_F16] = ggml_vk_create_pipeline("dequant_f16", dequant_f16_len, dequant_f16_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + vk_pipeline_dequant[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("dequant_q4_0", dequant_q4_0_len, dequant_q4_0_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + vk_pipeline_dequant[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("dequant_q4_1", dequant_q4_1_len, dequant_q4_1_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + vk_pipeline_dequant[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("dequant_q5_0", dequant_q5_0_len, dequant_q5_0_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + vk_pipeline_dequant[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("dequant_q5_1", dequant_q5_1_len, dequant_q5_1_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + vk_pipeline_dequant[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("dequant_q8_0", dequant_q8_0_len, dequant_q8_0_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + vk_pipeline_dequant[GGML_TYPE_Q2_K] = ggml_vk_create_pipeline("dequant_q2_K", dequant_q2_K_len, dequant_q2_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); + vk_pipeline_dequant[GGML_TYPE_Q3_K] = ggml_vk_create_pipeline("dequant_q3_K", dequant_q3_K_len, dequant_q3_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); + vk_pipeline_dequant[GGML_TYPE_Q4_K] = ggml_vk_create_pipeline("dequant_q4_K", dequant_q4_K_len, dequant_q4_K_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + vk_pipeline_dequant[GGML_TYPE_Q5_K] = ggml_vk_create_pipeline("dequant_q5_K", dequant_q5_K_len, dequant_q5_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); + vk_pipeline_dequant[GGML_TYPE_Q6_K] = ggml_vk_create_pipeline("dequant_q6_K", dequant_q6_K_len, dequant_q6_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); + + // get_rows + vk_pipeline_get_rows[GGML_TYPE_F16] = ggml_vk_create_pipeline("get_rows_f16", get_rows_f16_len, get_rows_f16_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + vk_pipeline_get_rows[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("get_rows_q4_0", get_rows_q4_0_len, get_rows_q4_0_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + vk_pipeline_get_rows[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("get_rows_q4_1", get_rows_q4_1_len, get_rows_q4_1_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + vk_pipeline_get_rows[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("get_rows_q5_0", get_rows_q5_0_len, get_rows_q5_0_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + vk_pipeline_get_rows[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("get_rows_q5_1", get_rows_q5_1_len, get_rows_q5_1_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + vk_pipeline_get_rows[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("get_rows_q8_0", get_rows_q8_0_len, get_rows_q8_0_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + + vk_pipeline_get_rows_f32[GGML_TYPE_F16] = ggml_vk_create_pipeline("get_rows_f16_f32", get_rows_f16_f32_len, get_rows_f16_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + vk_pipeline_get_rows_f32[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("get_rows_q4_0_f32", get_rows_q4_0_f32_len, get_rows_q4_0_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + vk_pipeline_get_rows_f32[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("get_rows_q4_1_f32", get_rows_q4_1_f32_len, get_rows_q4_1_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + vk_pipeline_get_rows_f32[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("get_rows_q5_0_f32", get_rows_q5_0_f32_len, get_rows_q5_0_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + vk_pipeline_get_rows_f32[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("get_rows_q5_1_f32", get_rows_q5_1_f32_len, get_rows_q5_1_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + vk_pipeline_get_rows_f32[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("get_rows_q8_0_f32", get_rows_q8_0_f32_len, get_rows_q8_0_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + vk_pipeline_matmul_split_k_reduce = ggml_vk_create_pipeline("split_k_reduce", split_k_reduce_len, split_k_reduce_data, "main", 2, 2 * sizeof(uint32_t), {256, 1, 1}, {}, 1); vk_pipeline_mul_mat_vec_p021_f16_f32 = ggml_vk_create_pipeline("mul_mat_vec_p021_f16_f32", mul_mat_vec_p021_f16_f32_len, mul_mat_vec_p021_f16_f32_data, "main", 3, 6 * sizeof(uint32_t), {1, 1, 1}, {}, 1); @@ -983,7 +954,7 @@ static void ggml_vk_load_shaders() { } void ggml_vk_init() { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_init()" << std::endl; #endif static bool initialized = false; @@ -999,17 +970,17 @@ void ggml_vk_init() { vk::ApplicationInfo app_info{ "ggml-vulkan", 1, nullptr, 0, VK_API_VERSION }; const std::vector layers = { -#ifdef VK_VALIDATE +#ifdef GGML_VULKAN_VALIDATE "VK_LAYER_KHRONOS_validation", #endif }; const std::vector extensions = { -#ifdef VK_VALIDATE +#ifdef GGML_VULKAN_VALIDATE "VK_EXT_validation_features", #endif }; vk::InstanceCreateInfo instance_create_info(vk::InstanceCreateFlags(), &app_info, layers, extensions); -#ifdef VK_VALIDATE +#ifdef GGML_VULKAN_VALIDATE const std::vector features_enable = { vk::ValidationFeatureEnableEXT::eBestPractices }; vk::ValidationFeaturesEXT validation_features = { features_enable, @@ -1120,7 +1091,7 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; device_extensions.push_back("VK_KHR_16bit_storage"); -#ifdef VK_VALIDATE +#ifdef GGML_VULKAN_VALIDATE device_extensions.push_back("VK_KHR_shader_non_semantic_info"); #endif @@ -1154,6 +1125,7 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; vk_fence = vk_device.device.createFence({}); vk_ctx = nullptr; + vk_transfer_ctx = nullptr; vk_disable = false; @@ -1166,7 +1138,7 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; } static vk_pipeline* ggml_vk_get_to_fp16(ggml_type type) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_get_to_fp16()" << std::endl; #endif switch (type) { @@ -1190,7 +1162,7 @@ static vk_pipeline* ggml_vk_get_to_fp16(ggml_type type) { } static vk_pipeline* ggml_vk_get_dequantize_mul_mat_vec(ggml_type type) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_get_dequantize_mul_mat_vec()" << std::endl; #endif switch (type) { @@ -1219,7 +1191,7 @@ static vk_pipeline* ggml_vk_get_dequantize_mul_mat_vec(ggml_type type) { static vk_buffer g_vk_buffer_pool[MAX_VK_BUFFERS]; static vk_buffer ggml_vk_pool_malloc(size_t size) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_pool_malloc(" << size << ")" << std::endl; #endif int best_i = -1; @@ -1253,7 +1225,7 @@ static vk_buffer ggml_vk_pool_malloc(size_t size) { } static void ggml_vk_pool_free(vk_buffer& buffer) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_pool_free(" << buffer.size << ")" << std::endl; #endif for (int i = 0; i < MAX_VK_BUFFERS; ++i) { @@ -1286,7 +1258,7 @@ static vk_buffer ggml_vk_create_buffer_temp(size_t size) { } static void * ggml_vk_host_malloc(size_t size) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_host_malloc(" << size << ")" << std::endl; #endif vk_buffer buf = ggml_vk_create_buffer(size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); @@ -1309,7 +1281,7 @@ static void ggml_vk_host_free(void* ptr) { if (ptr == nullptr) { return; } -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_host_free(" << ptr << ")" << std::endl; #endif vk_buffer* buf = nullptr; @@ -1363,7 +1335,7 @@ static void ggml_vk_dispatch_pipeline(vk_context * ctx, vk_pipeline& pipeline, s const uint32_t wg0 = CEIL_DIV(elements[0], pipeline.wg_denoms[0]); const uint32_t wg1 = CEIL_DIV(elements[1], pipeline.wg_denoms[1]); const uint32_t wg2 = CEIL_DIV(elements[2], pipeline.wg_denoms[2]); -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_dispatch_pipeline(" << pipeline.name << ", (" << wg0 << "," << wg1 << "," << wg2 << "))" << std::endl; #endif std::vector descriptor_buffer_infos; @@ -1398,7 +1370,7 @@ static void ggml_vk_end_submission(vk_submission& s, std::vector w } static void ggml_vk_ctx_end(vk_context * ctx) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_ctx_end(" << ctx << ", " << ctx->seqs.size() << ")" << std::endl; #endif if (ctx->s == nullptr) { @@ -1410,7 +1382,7 @@ static void ggml_vk_ctx_end(vk_context * ctx) { } static void ggml_vk_ctx_begin(vk_context * ctx) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_ctx_begin(" << ctx << ")" << std::endl; #endif if (ctx->s != nullptr) { @@ -1441,7 +1413,7 @@ static void ensure_sync_staging_buffer(size_t size) { } static void ggml_vk_buffer_write_nc_async(vk_context * ctx, vk_buffer* dst, size_t offset, const ggml_tensor * tensor, bool sync_staging = false) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_write_nc_async(" << tensor << ")" << std::endl; #endif GGML_ASSERT(!ggml_is_contiguous(tensor)); @@ -1548,7 +1520,7 @@ static void ggml_vk_buffer_write_nc_async(vk_context * ctx, vk_buffer* dst, size } static void ggml_vk_buffer_write_2d_async(vk_context * ctx, vk_buffer* dst, size_t offset, const void * src, size_t spitch, size_t width, size_t height, bool sync_staging = false) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_write_2d_async(" << width << ", " << height << ")" << std::endl; #endif // Buffer is already mapped @@ -1582,7 +1554,7 @@ static void ggml_vk_buffer_write_2d_async(vk_context * ctx, vk_buffer* dst, size ctx->s->buffer.copyBuffer(buf->buffer, dst->buffer, slices); return; } -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "STAGING" << std::endl; #endif @@ -1619,14 +1591,14 @@ static void ggml_vk_buffer_write_2d_async(vk_context * ctx, vk_buffer* dst, size } static void ggml_vk_buffer_write_async(vk_context * ctx, vk_buffer* dst, size_t offset, const void * src, size_t size, bool sync_staging = false) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_write_async(" << size << ")" << std::endl; #endif return ggml_vk_buffer_write_2d_async(ctx, dst, offset, src, size, size, 1, sync_staging); } static void ggml_vk_buffer_write_2d(vk_buffer* dst, size_t offset, const void * src, size_t spitch, size_t width, size_t height) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_write_2d(" << width << ", " << height << ")" << std::endl; #endif // Buffer is already mapped @@ -1653,14 +1625,14 @@ static void ggml_vk_buffer_write_2d(vk_buffer* dst, size_t offset, const void * } static void ggml_vk_buffer_write(vk_buffer* dst, size_t offset, const void * src, size_t size) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_write(" << size << ")" << std::endl; #endif ggml_vk_buffer_write_2d(dst, offset, src, 0, size, 1); } static void ggml_vk_buffer_read_2d_async(vk_context * ctx, vk_buffer* src, size_t offset, void * dst, size_t spitch, size_t dpitch, size_t width, size_t height, bool sync_staging = false) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_read_2d_async(offset=" << offset << ", width=" << width << ", height=" << height << ")" << std::endl; #endif GGML_ASSERT(width > 0); @@ -1693,7 +1665,7 @@ static void ggml_vk_buffer_read_2d_async(vk_context * ctx, vk_buffer* src, size_ return; } -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "STAGING" << std::endl; #endif @@ -1722,7 +1694,7 @@ static void ggml_vk_buffer_read_async(vk_context * ctx, vk_buffer* src, size_t o } static void ggml_vk_buffer_read(vk_buffer* src, size_t offset, void * dst, size_t size) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_read(" << offset << ", " << size << ")" << std::endl; #endif if(src->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) { @@ -1746,7 +1718,7 @@ static void ggml_vk_buffer_read(vk_buffer* src, size_t offset, void * dst, size_ } static void ggml_vk_buffer_copy_async(vk_context * ctx, vk_buffer * dst, size_t dst_offset, vk_buffer * src, size_t src_offset, size_t size) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_copy_async(" << size << ")" << std::endl; #endif VkBufferCopy bc{ src_offset, dst_offset, size }; @@ -1755,7 +1727,7 @@ static void ggml_vk_buffer_copy_async(vk_context * ctx, vk_buffer * dst, size_t } static void ggml_vk_buffer_copy(vk_buffer * dst, size_t dst_offset, vk_buffer * src, size_t src_offset, size_t size) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_copy(" << size << ")" << std::endl; #endif VkBufferCopy bc{ src_offset, dst_offset, size }; @@ -1771,7 +1743,7 @@ static void ggml_vk_buffer_copy(vk_buffer * dst, size_t dst_offset, vk_buffer * } static void ggml_vk_buffer_memset(vk_buffer* dst, size_t offset, uint32_t c, size_t size) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_memset(" << offset << ", " << c << ", " << size << ")" << std::endl; #endif vk_context * ctx = ggml_vk_create_context(vk_device.transfer_queue); @@ -1785,7 +1757,7 @@ static void ggml_vk_buffer_memset(vk_buffer* dst, size_t offset, uint32_t c, siz } static void ggml_vk_h2d_tensor_2d(vk_context * ctx, vk_buffer * dst, size_t offset, const ggml_tensor * src, uint64_t i3, uint64_t i2, uint64_t i1) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_h2d_tensor_2d(dst=" << dst << ", offset=" << offset << ", src=" << src << ", i3=" << i3 << ", i2=" << i2 << ", i1=" << i1 << ")" << std::endl; #endif const uint64_t ne0 = src->ne[0]; @@ -1815,7 +1787,7 @@ static void ggml_vk_h2d_tensor_2d(vk_context * ctx, vk_buffer * dst, size_t offs } static void ggml_vk_d2h_tensor_2d(vk_context * ctx, vk_buffer * src, size_t offset, const ggml_tensor * dst) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_d2h_tensor_2d()" << std::endl; #endif const uint64_t ne0 = dst->ne[0]; @@ -1841,24 +1813,24 @@ static void ggml_vk_d2h_tensor_2d(vk_context * ctx, vk_buffer * src, size_t offs } static uint32_t ggml_vk_guess_split_k(int m, int n, int k) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_guess_split_k(" << m << ", " << n << ", " << k << ")"; #endif if (k > 128 && (m < 128 || n < 128) && m > 2 && n > 2) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << " = 4" << std::endl; #endif return 4; } -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << " = 1" << std::endl; #endif return 1; } static uint32_t ggml_vk_guess_matmul_pipeline_align(int m, int n) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_guess_matmul_pipeline_align(" << m << ", " << n << ")" << std::endl; #endif if (m <= 32 || n <= 32) { @@ -1871,41 +1843,41 @@ static uint32_t ggml_vk_guess_matmul_pipeline_align(int m, int n) { } static vk_pipeline* ggml_vk_guess_matmul_pipeline(bool bit16_x, bool bit16_y, int m, int n, bool aligned) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_guess_matmul_pipeline(" << bit16_x << ", " << bit16_y << ", " << m << ", " << n << ", " << aligned << ")"; #endif if (bit16_x && bit16_y) { - if (m <= 32 || n <= 32) { -#ifdef VK_DEBUG + if (vk_device.vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) { +#ifdef GGML_VULKAN_DEBUG std::cerr << " S" << std::endl; #endif return aligned ? &vk_pipeline_matmul_f16_aligned_s : &vk_pipeline_matmul_f16_s; } if (vk_device.subgroup_size == 64 || m <= 64 || n <= 64) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << " M" << std::endl; #endif return aligned ? &vk_pipeline_matmul_f16_aligned_m : &vk_pipeline_matmul_f16_m; } -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << " L" << std::endl; #endif return aligned ? &vk_pipeline_matmul_f16_aligned_l : &vk_pipeline_matmul_f16_l; } if (bit16_x && !bit16_y) { - if (m <= 32 || n <= 32) { -#ifdef VK_DEBUG + if (vk_device.vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) { +#ifdef GGML_VULKAN_DEBUG std::cerr << " S" << std::endl; #endif return aligned ? &vk_pipeline_matmul_f16_f32_aligned_s : &vk_pipeline_matmul_f16_f32_s; } if (vk_device.subgroup_size == 64 || m <= 64 || n <= 64) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << " M" << std::endl; #endif return aligned ? &vk_pipeline_matmul_f16_f32_aligned_m : &vk_pipeline_matmul_f16_f32_m; } -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << " L" << std::endl; #endif return aligned ? &vk_pipeline_matmul_f16_f32_aligned_l : &vk_pipeline_matmul_f16_f32_l; @@ -1914,30 +1886,30 @@ static vk_pipeline* ggml_vk_guess_matmul_pipeline(bool bit16_x, bool bit16_y, in GGML_ASSERT(false); } - if (m <= 32 || n <= 32) { -#ifdef VK_DEBUG + if (vk_device.vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) { +#ifdef GGML_VULKAN_DEBUG std::cerr << " S" << std::endl; #endif return aligned ? &vk_pipeline_matmul_f32_aligned_s : &vk_pipeline_matmul_f32_s; } if (vk_device.subgroup_size == 64 || m <= 64 || n <= 64) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << " M" << std::endl; #endif return aligned ? &vk_pipeline_matmul_f32_aligned_m : &vk_pipeline_matmul_f32_m; } -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << " L" << std::endl; #endif return aligned ? &vk_pipeline_matmul_f32_aligned_l : &vk_pipeline_matmul_f32_l; } static void ggml_vk_matmul(vk_context * ctx, vk_pipeline& pipeline, vk_subbuffer&& a, vk_subbuffer&& b, vk_subbuffer&& d, vk_subbuffer&& split_k_buffer, uint32_t m, uint32_t n, uint32_t k, uint32_t stride_a, uint32_t stride_b, uint32_t stride_d, uint32_t split_k, uint32_t batch, uint32_t ne02, uint32_t ne12, uint32_t broadcast2, uint32_t broadcast3, uint32_t batch_stride_a, uint32_t batch_stride_b, uint32_t batch_stride_d) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_matmul(a: (" << a.buffer.buffer << ", " << a.offset << ", " << a.size << "), b: (" << b.buffer.buffer << ", " << b.offset << ", " << b.size << "), c: (" << d.buffer.buffer << ", " << d.offset << ", " << d.size << "), split_k: (" << split_k_buffer.buffer.buffer << ", " << split_k_buffer.offset << ", " << split_k_buffer.size << "), m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", split_k: " << split_k << ", batch: " << batch << ", ne02: " << ne02 << ", ne12: " << ne12 << ", broadcast2: " << broadcast2 << ", broadcast3: " << broadcast3 << ", batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ")" << std::endl; #endif + ggml_vk_sync_buffers(ctx); if (split_k == 1) { - ggml_vk_sync_buffers(ctx); const std::array pc = { m, n, k, stride_a, stride_b, stride_d, k, ne02, ne12, broadcast2, broadcast3, batch_stride_a, batch_stride_b, batch_stride_d }; ggml_vk_dispatch_pipeline(ctx, pipeline, { a, b, d }, pc.size() * sizeof(uint32_t), pc.data(), { m, n, batch }); return; @@ -1945,10 +1917,6 @@ static void ggml_vk_matmul(vk_context * ctx, vk_pipeline& pipeline, vk_subbuffer GGML_ASSERT(batch_stride_d == m * n); - // Synchronize the two submissions - ggml_vk_sync_buffers(ctx); - ctx->s->buffer.fillBuffer(split_k_buffer.buffer.buffer, 0, split_k_buffer.size, 0); - ggml_vk_sync_buffers(ctx); const std::array pc1 = { m, n, k, stride_a, stride_b, stride_d, CEIL_DIV(k, split_k), ne02, ne12, broadcast2, broadcast3, batch_stride_a, batch_stride_b, batch_stride_d }; // Make sure enough workgroups get assigned for split k to work ggml_vk_dispatch_pipeline(ctx, pipeline, { a, b, split_k_buffer }, pc1.size() * sizeof(uint32_t), pc1.data(), { (CEIL_DIV(m, pipeline.wg_denoms[0]) * pipeline.wg_denoms[0]) * split_k, n, batch }); @@ -1980,7 +1948,7 @@ static vk_pipeline * ggml_vk_get_cpy_pipeline(ggml_type from, ggml_type to) { } static void ggml_vk_cpy_to_contiguous(vk_context * ctx, vk_pipeline * pipeline, const ggml_tensor * tensor, vk_subbuffer&& in, vk_subbuffer&& out, ggml_type buffer_type, bool aligned=true) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_cpy_to_contiguous((" << tensor << ", type=" << tensor->type << ", backend=" << tensor->backend << ", ne0=" << tensor->ne[0] << ", ne1=" << tensor->ne[1] << ", ne2=" << tensor->ne[2] << ", ne3=" << tensor->ne[3] << ", nb0=" << tensor->nb[0] << ", nb1=" << tensor->nb[1] << ", nb2=" << tensor->nb[2] << ", nb3=" << tensor->nb[3] << "), "; std::cerr << "buffer in size=" << in.buffer.size << ", buffer out size=" << out.buffer.size << ")" << std::endl; #endif @@ -2002,7 +1970,7 @@ static void ggml_vk_cpy_to_contiguous(vk_context * ctx, vk_pipeline * pipeline, } static void ggml_vk_mul_mat_q_f16(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_mul_mat_q_f16((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3]; std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3]; std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl; @@ -2186,7 +2154,7 @@ static void ggml_vk_mul_mat_q_f16(vk_context * ctx, const ggml_tensor * src0, co } static void ggml_vk_mul_mat_vec_q_f16(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_mul_mat_vec_q_f16((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3]; std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3]; std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl; @@ -2366,7 +2334,7 @@ static void ggml_vk_mul_mat_vec_q_f16(vk_context * ctx, const ggml_tensor * src0 } static void ggml_vk_mul_mat_vec_p021_f16_f32(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_mul_mat_p021_f16_f32((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3]; std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3]; std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl; @@ -2455,7 +2423,7 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(vk_context * ctx, const ggml_tensor } static void ggml_vk_mul_mat_vec_nc_f16_f32(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_mul_mat_nc_f16_f32((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3]; std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3]; std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl; @@ -2561,7 +2529,7 @@ static bool ggml_vk_can_mul_mat(const ggml_tensor * src0, const ggml_tensor * sr } static void ggml_vk_mul_mat(vk_context * ctx, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_mul_mat(" << src0 << ", " << src1 << ", " << dst << ")" << std::endl; #endif if (src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) { @@ -2774,7 +2742,7 @@ static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * template static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, ggml_op op, const PC&& pc) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_op_f32((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3]; if (src1 != nullptr) { std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3]; @@ -3095,7 +3063,7 @@ static void ggml_vk_nop(vk_context * ctx, const ggml_tensor * src0, ggml_tensor } } -#ifdef VK_RUN_TESTS +#ifdef GGML_VULKAN_RUN_TESTS static void ggml_vk_print_matrix_area(const void * data, ggml_type type, int ne0, int ne1, int i0, int i1, int i2) { if (type != GGML_TYPE_F32 && type != GGML_TYPE_F16) { return; @@ -3129,7 +3097,7 @@ static void ggml_vk_print_matrix_area(const void * data, ggml_type type, int ne0 template static void ggml_vk_test_matmul(size_t m, size_t n, size_t k, size_t batch, size_t num_it, int split_k, int shader_size) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_test_matmul(" << m << ", " << n << ", " << k << ", " << batch << ", " << num_it << ", " << split_k << ", " << shader_size << ")" << std::endl; #endif const size_t x_ne = m * k * batch; @@ -3520,7 +3488,7 @@ static void ggml_vk_test_h2d_nc(size_t ne0, size_t ne1, size_t ne2, size_t ne3) } static void ggml_vk_test_transfer(size_t ne, bool pinned) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_test_transfer(" << ne << ")" << std::endl; #endif // Check transfers are correct @@ -3600,10 +3568,103 @@ static void ggml_vk_test_transfer(size_t ne, bool pinned) { free(y); } } + +static void ggml_vk_test_dequant(size_t ne, ggml_type quant) { +#ifdef GGML_VULKAN_DEBUG + std::cerr << "ggml_vk_test_dequant(" << ne << ")" << std::endl; +#endif + const size_t x_sz = sizeof(float) * ne; + const size_t x_sz_f16 = sizeof(ggml_fp16_t) * ne; + const size_t qx_sz = ne * ggml_type_size(quant)/ggml_blck_size(quant); + float * x = (float *) malloc(x_sz); + void * qx = malloc(qx_sz); + vk_buffer qx_buf = ggml_vk_create_buffer_check(qx_sz, vk::MemoryPropertyFlagBits::eDeviceLocal); + vk_buffer x_buf = ggml_vk_create_buffer_check(x_sz_f16, vk::MemoryPropertyFlagBits::eDeviceLocal); + ggml_fp16_t * x_chk = (ggml_fp16_t *) malloc(x_sz_f16); + + for (size_t i = 0; i < ne; i++) { + x[i] = rand() / (float)RAND_MAX; + } + + std::vector hist_cur(1 << 4, 0); + + vk_pipeline& p = vk_pipeline_dequant[quant]; + + switch(quant) { + case GGML_TYPE_Q4_0: + ggml_quantize_q4_0(x, qx, ne, ne, hist_cur.data()); + break; + case GGML_TYPE_Q4_1: + ggml_quantize_q4_1(x, qx, ne, ne, hist_cur.data()); + break; + case GGML_TYPE_Q5_0: + ggml_quantize_q5_0(x, qx, ne, ne, hist_cur.data()); + break; + case GGML_TYPE_Q5_1: + ggml_quantize_q4_1(x, qx, ne, ne, hist_cur.data()); + break; + case GGML_TYPE_Q8_0: + ggml_quantize_q8_0(x, qx, ne, ne, hist_cur.data()); + break; + case GGML_TYPE_Q2_K: + ggml_quantize_q2_K(x, qx, ne, ne, hist_cur.data()); + break; + case GGML_TYPE_Q3_K: + ggml_quantize_q3_K(x, qx, ne, ne, hist_cur.data()); + break; + case GGML_TYPE_Q4_K: + ggml_quantize_q4_K(x, qx, ne, ne, hist_cur.data()); + break; + case GGML_TYPE_Q5_K: + ggml_quantize_q5_K(x, qx, ne, ne, hist_cur.data()); + break; + case GGML_TYPE_Q6_K: + ggml_quantize_q6_K(x, qx, ne, ne, hist_cur.data()); + break; + default: + GGML_ASSERT(false); + } + + ggml_vk_pipeline_allocate_descriptor_sets(p, 1); + + ggml_vk_buffer_write(&qx_buf, 0, qx, qx_sz); + + vk_context * ctx = ggml_vk_create_context(vk_device.compute_queue); + ggml_vk_ctx_begin(ctx); + const std::vector pc = { 1, (int)ne, (int)ne, (int)ne }; + ggml_vk_sync_buffers(ctx); + ggml_vk_dispatch_pipeline(ctx, p, { { qx_buf, 0, qx_sz }, { x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1}); + ggml_vk_ctx_end(ctx); + + auto begin = std::chrono::high_resolution_clock::now(); + + ggml_vk_submit(ctx, vk_fence); + VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "ggml_vk_compute_forward waitForFences"); + vk_device.device.resetFences({ vk_fence }); + + auto end = std::chrono::high_resolution_clock::now(); + + double ms_dequant = std::chrono::duration_cast(end-begin).count() / 1000.0; + ggml_vk_buffer_read(&x_buf, 0, x_chk, x_sz_f16); + + double avg_err = 0.0; + for (size_t i = 0; i < ne; i++) { + avg_err += std::fabs(x[i] - ggml_fp16_to_fp32(x_chk[i])); + } + + std::cerr << "TEST DEQUANT " << ggml_type_name(quant) << " time=" << ms_dequant << "ms avg_err=" << avg_err / ne << std::endl; + + ggml_vk_destroy_buffer(x_buf); + ggml_vk_destroy_buffer(qx_buf); + + free(x); + free(qx); + free(x_chk); +} #endif static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_create_extra(" << tensor << " (" << tensor->name << ", " << ggml_op_name(tensor->op) << "))" << std::endl; #endif ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu; @@ -3627,7 +3688,7 @@ static ggml_tensor * ggml_vk_find_last_use(const ggml_tensor * node, ggml_cgraph } void ggml_vk_preallocate_buffers_graph(ggml_tensor * node){ -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_preallocate_buffers_graph(" << node << ")" << std::endl; #endif const bool any_on_device = node->backend == GGML_BACKEND_GPU @@ -3746,15 +3807,26 @@ void ggml_vk_preallocate_buffers() { if (vk_disable) { return; } -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_preallocate_buffers()" << std::endl; std::cerr << "qx_size: " << vk_prealloc_size_qx << " qy_size: " << vk_prealloc_size_qy << " x_size: " << vk_prealloc_size_x << " y_size: " << vk_prealloc_size_y << " split_k_size: " << vk_prealloc_size_split_k << std::endl; #endif -#if defined(VK_RUN_TESTS) +#if defined(GGML_VULKAN_RUN_TESTS) vk_staging = ggml_vk_create_buffer_check(100ul * 1024ul * 1024ul, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); ggml_vk_test_transfer(8192 * 1000, false); ggml_vk_test_transfer(8192 * 1000, true); + ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q4_0); + ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q4_1); + ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q5_0); + ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q5_1); + ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q8_0); + ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q2_K); + ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q3_K); + ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q4_K); + ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q5_K); + ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q6_K); + const std::vector vals { 8, 8, 8, 100, 46, 576, @@ -3845,7 +3917,7 @@ void ggml_vk_build_graph(ggml_tensor * node, bool last_node){ return; } -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_build_graph(" << node << ", " << ggml_op_name(node->op) << ")" << std::endl; #endif vk_semaphore_idx = 0; @@ -4068,7 +4140,7 @@ bool ggml_vk_compute_forward(ggml_compute_params * params, ggml_tensor * tensor) return true; } -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_compute_forward(" << tensor << ", name=" << tensor->name << ", op=" << ggml_op_name(tensor->op) << ", type=" << tensor->type << ", backend=" << tensor->backend << ", ne0=" << tensor->ne[0] << ", ne1=" << tensor->ne[1] << ", ne2=" << tensor->ne[2] << ", ne3=" << tensor->ne[3] << ", nb0=" << tensor->nb[0] << ", nb1=" << tensor->nb[1] << ", nb2=" << tensor->nb[2] << ", nb3=" << tensor->nb[3] << ", view_src=" << tensor->view_src << ", view_offs=" << tensor->view_offs << ")" << std::endl; #endif @@ -4111,7 +4183,7 @@ void ggml_vk_graph_cleanup() { if (vk_disable) { return; } -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_graph_cleanup()" << std::endl; #endif for (auto& buffer : vk_gc.temp_buffers) { @@ -4150,7 +4222,7 @@ void ggml_vk_graph_cleanup() { } static void ggml_vk_cleanup() { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_cleanup()" << std::endl; #endif ggml_vk_destroy_buffer(vk_prealloc_x); @@ -4234,7 +4306,7 @@ GGML_CALL static void * ggml_backend_vk_buffer_get_base(ggml_backend_buffer_t bu } GGML_CALL static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_buffer_init_tensor(" << buffer << " (" << buffer->context << "), " << tensor << ")" << std::endl; #endif ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context; @@ -4254,7 +4326,7 @@ GGML_CALL static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t b } GGML_CALL static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_buffer_set_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl; #endif GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU); @@ -4267,7 +4339,7 @@ GGML_CALL static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t bu } GGML_CALL static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_buffer_get_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl; #endif GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU); @@ -4323,7 +4395,7 @@ GGML_CALL static const char * ggml_backend_vk_buffer_type_name(ggml_backend_buff } GGML_CALL static ggml_backend_buffer_t ggml_backend_vk_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_buffer_type_alloc_buffer(" << size << ")" << std::endl; #endif vk_buffer dev_buffer = ggml_vk_create_buffer_device(size); @@ -4467,7 +4539,7 @@ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_vk_get_default_buffer_t } GGML_CALL static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_set_tensor_async(" << size << ")" << std::endl; #endif GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type() || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type"); @@ -4475,19 +4547,19 @@ GGML_CALL static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, g ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra; - if (vk_ctx == nullptr) { + if (vk_transfer_ctx == nullptr) { // Initialize new transfer context - vk_ctx = ggml_vk_create_context(vk_device.transfer_queue); - ggml_vk_ctx_begin(vk_ctx); + vk_transfer_ctx = ggml_vk_create_context(vk_device.transfer_queue); + ggml_vk_ctx_begin(vk_transfer_ctx); } - ggml_vk_buffer_write_async(vk_ctx, &extra->buffer_gpu, extra->offset + offset, data, size); + ggml_vk_buffer_write_async(vk_transfer_ctx, &extra->buffer_gpu, extra->offset + offset, data, size); UNUSED(backend); } GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_get_tensor_async(" << size << ")" << std::endl; #endif GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type() || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type"); @@ -4495,32 +4567,32 @@ GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, c ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra; - if (vk_ctx == nullptr) { + if (vk_transfer_ctx == nullptr) { // Initialize new transfer context - vk_ctx = ggml_vk_create_context(vk_device.transfer_queue); - ggml_vk_ctx_begin(vk_ctx); + vk_transfer_ctx = ggml_vk_create_context(vk_device.transfer_queue); + ggml_vk_ctx_begin(vk_transfer_ctx); } - ggml_vk_buffer_read_async(vk_ctx, &extra->buffer_gpu, extra->offset + offset, data, size); + ggml_vk_buffer_read_async(vk_transfer_ctx, &extra->buffer_gpu, extra->offset + offset, data, size); UNUSED(backend); } GGML_CALL static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend, const ggml_tensor * src, ggml_tensor * dst) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_cpy_tensor_async()" << std::endl; #endif if ((dst->buffer->buft == ggml_backend_vk_buffer_type() || dst->buffer->buft == ggml_backend_vk_host_buffer_type()) && ggml_backend_buffer_is_vk(src->buffer)) { ggml_tensor_extra_gpu * src_extra = (ggml_tensor_extra_gpu *) src->extra; ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra; - if (vk_ctx == nullptr) { + if (vk_transfer_ctx == nullptr) { // Initialize new transfer context - vk_ctx = ggml_vk_create_context(vk_device.transfer_queue); - ggml_vk_ctx_begin(vk_ctx); + vk_transfer_ctx = ggml_vk_create_context(vk_device.transfer_queue); + ggml_vk_ctx_begin(vk_transfer_ctx); } - ggml_vk_buffer_copy_async(vk_ctx, &src_extra->buffer_gpu, src_extra->offset, &dst_extra->buffer_gpu, dst_extra->offset, ggml_nbytes(src)); + ggml_vk_buffer_copy_async(vk_transfer_ctx, &src_extra->buffer_gpu, src_extra->offset, &dst_extra->buffer_gpu, dst_extra->offset, ggml_nbytes(src)); return true; } @@ -4530,28 +4602,28 @@ GGML_CALL static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend, c } GGML_CALL static void ggml_backend_vk_synchronize(ggml_backend_t backend) { -#ifdef VK_DEBUG +#ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_synchronize()" << std::endl; #endif - if(vk_ctx == nullptr) { + if(vk_transfer_ctx == nullptr) { return; } - ggml_vk_ctx_end(vk_ctx); + ggml_vk_ctx_end(vk_transfer_ctx); - for (auto& cpy : vk_ctx->in_memcpys) { + for (auto& cpy : vk_transfer_ctx->in_memcpys) { memcpy(cpy.dst, cpy.src, cpy.n); } - ggml_vk_submit(vk_ctx, vk_fence); + ggml_vk_submit(vk_transfer_ctx, vk_fence); VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "ggml_backend_vk_synchronize waitForFences"); vk_device.device.resetFences({ vk_fence }); - for (auto& cpy : vk_ctx->out_memcpys) { + for (auto& cpy : vk_transfer_ctx->out_memcpys) { memcpy(cpy.dst, cpy.src, cpy.n); } - vk_ctx = nullptr; + vk_transfer_ctx = nullptr; UNUSED(backend); } diff --git a/ggml_vk_generate_shaders.py b/ggml_vk_generate_shaders.py index 67981a751..4abb0383f 100644 --- a/ggml_vk_generate_shaders.py +++ b/ggml_vk_generate_shaders.py @@ -157,19 +157,10 @@ struct block_q6_K # Dequant functions shader_f16_dequant_func = """ -#define DEQUANT_FUNC f16vec2 v = f16vec2(data_a[ib + 0], data_a[ib + 1]); -""" -shader_f16_dequant_func_compat = """ #define DEQUANT_FUNC vec2 v = vec2(data_a[ib + 0], data_a[ib + 1]); """ shader_q4_0_dequant_func = """ -#define DEQUANT_FUNC const float16_t d = data_a[ib].d; \ -const uint8_t vui = data_a[ib].qs[iqs]; \ -f16vec2 v = f16vec2(vui & 0xF, vui >> 4); \ -v = (v - 8.0hf)*d; -""" -shader_q4_0_dequant_func_compat = """ #define DEQUANT_FUNC const float d = float(data_a[ib].d); \ const uint vui = uint(data_a[ib].qs[iqs]); \ vec2 v = vec2(vui & 0xF, vui >> 4); \ @@ -177,13 +168,6 @@ v = (v - 8.0f)*d; """ shader_q4_1_dequant_func = """ -#define DEQUANT_FUNC const float16_t d = data_a[ib].d; \ -const float16_t m = data_a[ib].m; \ -const uint8_t vui = data_a[ib].qs[iqs]; \ -f16vec2 v = f16vec2(vui & 0xF, vui >> 4); \ -v = v*d + m; -""" -shader_q4_1_dequant_func_compat = """ #define DEQUANT_FUNC const float d = float(data_a[ib].d); \ const float m = float(data_a[ib].m); \ const uint vui = uint(data_a[ib].qs[iqs]); \ @@ -192,14 +176,6 @@ v = v*d + m; """ shader_q5_0_dequant_func = """ -#define DEQUANT_FUNC const float16_t d = data_a[ib].d; \ -const uint uint_qh = uint(data_a[ib].qh[1]) << 16 | data_a[ib].qh[0]; \ -const ivec2 qh = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10); \ -const uint8_t vui = data_a[ib].qs[iqs]; \ -f16vec2 v = f16vec2((vui & 0xF) | qh.x, (vui >> 4) | qh.y); \ -v = (v - 16.0hf) * d; -""" -shader_q5_0_dequant_func_compat = """ #define DEQUANT_FUNC const float d = float(data_a[ib].d); \ const uint uint_qh = uint(data_a[ib].qh[1]) << 16 | data_a[ib].qh[0]; \ const ivec2 qh = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10); \ @@ -209,14 +185,6 @@ v = (v - 16.0f) * d; """ shader_q5_1_dequant_func = """ -#define DEQUANT_FUNC const float16_t d = data_a[ib].d; \ -const float16_t m = data_a[ib].m; \ -const ivec2 qh = ivec2(((data_a[ib].qh >> iqs) << 4) & 0x10, (data_a[ib].qh >> (iqs + 12)) & 0x10); \ -const uint8_t vui = data_a[ib].qs[iqs]; \ -f16vec2 v = f16vec2((vui & 0xF) | qh.x, (vui >> 4) | qh.y); \ -v = v*d + m; -""" -shader_q5_1_dequant_func_compat = """ #define DEQUANT_FUNC const float d = float(data_a[ib].d); \ const float m = float(data_a[ib].m); \ const ivec2 qh = ivec2(((data_a[ib].qh >> iqs) << 4) & 0x10, (data_a[ib].qh >> (iqs + 12)) & 0x10); \ @@ -226,11 +194,6 @@ v = v*d + m; """ shader_q8_0_dequant_func = """ -#define DEQUANT_FUNC const float16_t d = data_a[ib].d; \ -f16vec2 v = f16vec2(data_a[ib].qs[iqs], data_a[ib].qs[iqs + 1]); \ -v = v * d; -""" -shader_q8_0_dequant_func_compat = """ #define DEQUANT_FUNC const float d = float(data_a[ib].d); \ vec2 v = vec2(int(data_a[ib].qs[iqs]), int(data_a[ib].qs[iqs + 1])); \ v = v * d; @@ -2110,7 +2073,7 @@ lock = asyncio.Lock() shader_fnames = [] -async def string_to_spv(name, code, defines, fp16): +async def string_to_spv(name, code, defines, fp16=True): f = NamedTemporaryFile(mode="w", delete=False) f.write(code) f.flush() @@ -2200,64 +2163,6 @@ async def main(): tasks.append(string_to_spv("matmul_f16_f32_aligned_m", "".join(stream), {"LOAD_VEC": load_vec, "A_TYPE": vec_type_f16, "B_TYPE": vec_type, "D_TYPE": "float"}, fp16)) tasks.append(string_to_spv("matmul_f16_f32_aligned_s", "".join(stream), {"LOAD_VEC": load_vec, "A_TYPE": vec_type_f16, "B_TYPE": vec_type, "D_TYPE": "float"}, fp16)) - # Build dequant shaders - tasks.append(string_to_spv("f32_to_f16", f32_to_f16_src, {}, fp16)) - - for i in range(0, VK_NUM_TYPES): - stream.clear() - - stream.extend((dequant_head, shader_int8_ext, shader_float_type)) - - if i == GGML_TYPE_F16: - stream.extend((shader_f16_defines, shader_f16_dequant_func_compat if not fp16 else shader_f16_dequant_func, dequant_body)) - elif i == GGML_TYPE_Q4_0: - stream.extend((shader_q4_0_defines, shader_q4_0_dequant_func_compat if not fp16 else shader_q4_0_dequant_func, dequant_body)) - elif i == GGML_TYPE_Q4_1: - stream.extend((shader_q4_1_defines, shader_q4_1_dequant_func_compat if not fp16 else shader_q4_1_dequant_func, dequant_body)) - elif i == GGML_TYPE_Q5_0: - stream.extend((shader_q5_0_defines, shader_q5_0_dequant_func_compat if not fp16 else shader_q5_0_dequant_func, dequant_body)) - elif i == GGML_TYPE_Q5_1: - stream.extend((shader_q5_1_defines, shader_q5_1_dequant_func_compat if not fp16 else shader_q5_1_dequant_func, dequant_body)) - elif i == GGML_TYPE_Q8_0: - stream.extend((shader_q8_0_defines, shader_q8_0_dequant_func_compat if not fp16 else shader_q8_0_dequant_func, dequant_body)) - elif i == GGML_TYPE_Q2_K: - stream.extend((shader_q2_K_defines, dequant_q2_K_body)) - elif i == GGML_TYPE_Q3_K: - stream.extend((shader_q3_K_defines, dequant_q3_K_body)) - elif i == GGML_TYPE_Q4_K: - stream.extend((shader_q4_K_defines, dequant_q4_K_body)) - elif i == GGML_TYPE_Q5_K: - stream.extend((shader_q5_K_defines, dequant_q5_K_body)) - elif i == GGML_TYPE_Q6_K: - stream.extend((shader_q6_K_defines, dequant_q6_K_body)) - else: - continue - - tasks.append(string_to_spv(f"dequant_{type_names[i]}", "".join(stream), {"D_TYPE": "float16_t"}, fp16)) - - # get_rows - for i in range(0, VK_NUM_TYPES): - stream.clear() - stream.extend((generic_head, shader_int8_ext, shader_float_type)) - - if i == GGML_TYPE_F16: - stream.extend((shader_f16_defines, shader_f16_dequant_func_compat if not fp16 else shader_f16_dequant_func, get_rows_body)) - elif i == GGML_TYPE_Q4_0: - stream.extend((shader_q4_0_defines, shader_q4_0_dequant_func_compat if not fp16 else shader_q4_0_dequant_func, get_rows_body)) - elif i == GGML_TYPE_Q4_1: - stream.extend((shader_q4_1_defines, shader_q4_1_dequant_func_compat if not fp16 else shader_q4_1_dequant_func, get_rows_body)) - elif i == GGML_TYPE_Q5_0: - stream.extend((shader_q5_0_defines, shader_q5_0_dequant_func_compat if not fp16 else shader_q5_0_dequant_func, get_rows_body)) - elif i == GGML_TYPE_Q5_1: - stream.extend((shader_q5_1_defines, shader_q5_1_dequant_func_compat if not fp16 else shader_q5_1_dequant_func, get_rows_body)) - elif i == GGML_TYPE_Q8_0: - stream.extend((shader_q8_0_defines, shader_q8_0_dequant_func_compat if not fp16 else shader_q8_0_dequant_func, get_rows_body)) - else: - continue - - tasks.append(string_to_spv(f"get_rows_{type_names[i]}", "".join(stream), {"B_TYPE": "float", "D_TYPE": "float16_t"}, fp16)) - tasks.append(string_to_spv(f"get_rows_{type_names[i]}_f32", "".join(stream), {"B_TYPE": "float", "D_TYPE": "float"}, fp16)) - # Shaders where precision is needed, so no fp16 version # mul mat vec @@ -2266,17 +2171,17 @@ async def main(): stream.extend((mul_mat_vec_head, shader_int8_ext, shader_f32)) if i == GGML_TYPE_F16: - stream.extend((shader_f16_defines, shader_f16_dequant_func_compat, mul_mat_vec_body)) + stream.extend((shader_f16_defines, shader_f16_dequant_func, mul_mat_vec_body)) elif i == GGML_TYPE_Q4_0: - stream.extend((shader_q4_0_defines, shader_q4_0_dequant_func_compat, mul_mat_vec_body)) + stream.extend((shader_q4_0_defines, shader_q4_0_dequant_func, mul_mat_vec_body)) elif i == GGML_TYPE_Q4_1: - stream.extend((shader_q4_1_defines, shader_q4_1_dequant_func_compat, mul_mat_vec_body)) + stream.extend((shader_q4_1_defines, shader_q4_1_dequant_func, mul_mat_vec_body)) elif i == GGML_TYPE_Q5_0: - stream.extend((shader_q5_0_defines, shader_q5_0_dequant_func_compat, mul_mat_vec_body)) + stream.extend((shader_q5_0_defines, shader_q5_0_dequant_func, mul_mat_vec_body)) elif i == GGML_TYPE_Q5_1: - stream.extend((shader_q5_1_defines, shader_q5_1_dequant_func_compat, mul_mat_vec_body)) + stream.extend((shader_q5_1_defines, shader_q5_1_dequant_func, mul_mat_vec_body)) elif i == GGML_TYPE_Q8_0: - stream.extend((shader_q8_0_defines, shader_q8_0_dequant_func_compat, mul_mat_vec_body)) + stream.extend((shader_q8_0_defines, shader_q8_0_dequant_func, mul_mat_vec_body)) elif i == GGML_TYPE_Q2_K: stream.extend((shader_q2_K_defines, mul_mat_vec_q2_K_body)) elif i == GGML_TYPE_Q3_K: @@ -2290,43 +2195,101 @@ async def main(): else: continue - tasks.append(string_to_spv(f"mul_mat_vec_{type_names[i]}_f32", "".join(stream), {"B_TYPE": "float", "D_TYPE": "float", "K_QUANTS_PER_ITERATION": K_QUANTS_PER_ITERATION}, fp16)) + tasks.append(string_to_spv(f"mul_mat_vec_{type_names[i]}_f32", "".join(stream), {"B_TYPE": "float", "D_TYPE": "float", "K_QUANTS_PER_ITERATION": K_QUANTS_PER_ITERATION})) - tasks.append(string_to_spv("mul_mat_vec_p021_f16_f32", mul_mat_p021_src, {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}, True)) - tasks.append(string_to_spv("mul_mat_vec_nc_f16_f32", mul_mat_nc_src, {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}, True)) + # Dequant shaders + for i in range(0, VK_NUM_TYPES): + stream.clear() + + stream.extend((dequant_head, shader_int8_ext, shader_f32)) + + if i == GGML_TYPE_F16: + stream.extend((shader_f16_defines, shader_f16_dequant_func, dequant_body)) + elif i == GGML_TYPE_Q4_0: + stream.extend((shader_q4_0_defines, shader_q4_0_dequant_func, dequant_body)) + elif i == GGML_TYPE_Q4_1: + stream.extend((shader_q4_1_defines, shader_q4_1_dequant_func, dequant_body)) + elif i == GGML_TYPE_Q5_0: + stream.extend((shader_q5_0_defines, shader_q5_0_dequant_func, dequant_body)) + elif i == GGML_TYPE_Q5_1: + stream.extend((shader_q5_1_defines, shader_q5_1_dequant_func, dequant_body)) + elif i == GGML_TYPE_Q8_0: + stream.extend((shader_q8_0_defines, shader_q8_0_dequant_func, dequant_body)) + elif i == GGML_TYPE_Q2_K: + stream.extend((shader_q2_K_defines, dequant_q2_K_body)) + elif i == GGML_TYPE_Q3_K: + stream.extend((shader_q3_K_defines, dequant_q3_K_body)) + elif i == GGML_TYPE_Q4_K: + stream.extend((shader_q4_K_defines, dequant_q4_K_body)) + elif i == GGML_TYPE_Q5_K: + stream.extend((shader_q5_K_defines, dequant_q5_K_body)) + elif i == GGML_TYPE_Q6_K: + stream.extend((shader_q6_K_defines, dequant_q6_K_body)) + else: + continue + + tasks.append(string_to_spv(f"dequant_{type_names[i]}", "".join(stream), {"D_TYPE": "float16_t"})) + + tasks.append(string_to_spv("f32_to_f16", f32_to_f16_src, {})) + + # get_rows + for i in range(0, VK_NUM_TYPES): + stream.clear() + stream.extend((generic_head, shader_int8_ext, shader_f32)) + + if i == GGML_TYPE_F16: + stream.extend((shader_f16_defines, shader_f16_dequant_func, get_rows_body)) + elif i == GGML_TYPE_Q4_0: + stream.extend((shader_q4_0_defines, shader_q4_0_dequant_func, get_rows_body)) + elif i == GGML_TYPE_Q4_1: + stream.extend((shader_q4_1_defines, shader_q4_1_dequant_func, get_rows_body)) + elif i == GGML_TYPE_Q5_0: + stream.extend((shader_q5_0_defines, shader_q5_0_dequant_func, get_rows_body)) + elif i == GGML_TYPE_Q5_1: + stream.extend((shader_q5_1_defines, shader_q5_1_dequant_func, get_rows_body)) + elif i == GGML_TYPE_Q8_0: + stream.extend((shader_q8_0_defines, shader_q8_0_dequant_func, get_rows_body)) + else: + continue + + tasks.append(string_to_spv(f"get_rows_{type_names[i]}", "".join(stream), {"B_TYPE": "float", "D_TYPE": "float16_t"})) + tasks.append(string_to_spv(f"get_rows_{type_names[i]}_f32", "".join(stream), {"B_TYPE": "float", "D_TYPE": "float"})) + + tasks.append(string_to_spv("mul_mat_vec_p021_f16_f32", mul_mat_p021_src, {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"})) + tasks.append(string_to_spv("mul_mat_vec_nc_f16_f32", mul_mat_nc_src, {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"})) # Norms - tasks.append(string_to_spv("norm_f32", f"{generic_head}\n{shader_f32}\n{norm_body}", {"A_TYPE": "float", "D_TYPE": "float"}, True)) - tasks.append(string_to_spv("rms_norm_f32", f"{generic_head}\n{shader_f32}\n{rms_norm_body}", {"A_TYPE": "float", "D_TYPE": "float"}, True)) + tasks.append(string_to_spv("norm_f32", f"{generic_head}\n{shader_f32}\n{norm_body}", {"A_TYPE": "float", "D_TYPE": "float"})) + tasks.append(string_to_spv("rms_norm_f32", f"{generic_head}\n{shader_f32}\n{rms_norm_body}", {"A_TYPE": "float", "D_TYPE": "float"})) - tasks.append(string_to_spv("cpy_f32_f32", f"{cpy_src}\n{cpy_end}", {"A_TYPE": "float", "D_TYPE": "float"}, True)) - tasks.append(string_to_spv("cpy_f32_f16", f"{cpy_src}\n{cpy_end}", {"A_TYPE": "float", "D_TYPE": "float16_t"}, True)) - tasks.append(string_to_spv("cpy_f16_f16", f"{cpy_src}\n{cpy_f16_f16_end}", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}, True)) + tasks.append(string_to_spv("cpy_f32_f32", f"{cpy_src}\n{cpy_end}", {"A_TYPE": "float", "D_TYPE": "float"})) + tasks.append(string_to_spv("cpy_f32_f16", f"{cpy_src}\n{cpy_end}", {"A_TYPE": "float", "D_TYPE": "float16_t"})) + tasks.append(string_to_spv("cpy_f16_f16", f"{cpy_src}\n{cpy_f16_f16_end}", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"})) - tasks.append(string_to_spv("add_f32", f"{generic_head}\n{shader_f32}\n{add_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float"}, True)) + tasks.append(string_to_spv("add_f32", f"{generic_head}\n{shader_f32}\n{add_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float"})) - tasks.append(string_to_spv("split_k_reduce", mulmat_split_k_reduce_src, {}, True)) - tasks.append(string_to_spv("mul_f32", f"{generic_head}\n{shader_f32}\n{mul_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float"}, True)) + tasks.append(string_to_spv("split_k_reduce", mulmat_split_k_reduce_src, {})) + tasks.append(string_to_spv("mul_f32", f"{generic_head}\n{shader_f32}\n{mul_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float"})) - tasks.append(string_to_spv("scale_f32", f"{generic_head}\n{shader_f32}\n{scale_body}", {"A_TYPE": "float", "D_TYPE": "float"}, True)) + tasks.append(string_to_spv("scale_f32", f"{generic_head}\n{shader_f32}\n{scale_body}", {"A_TYPE": "float", "D_TYPE": "float"})) - tasks.append(string_to_spv("sqr_f32", f"{generic_head}\n{shader_f32}\n{sqr_body}", {"A_TYPE": "float", "D_TYPE": "float"}, True)) + tasks.append(string_to_spv("sqr_f32", f"{generic_head}\n{shader_f32}\n{sqr_body}", {"A_TYPE": "float", "D_TYPE": "float"})) - tasks.append(string_to_spv("clamp_f32", f"{generic_head}\n{shader_f32}\n{clamp_body}", {"A_TYPE": "float", "D_TYPE": "float"}, True)) + tasks.append(string_to_spv("clamp_f32", f"{generic_head}\n{shader_f32}\n{clamp_body}", {"A_TYPE": "float", "D_TYPE": "float"})) - tasks.append(string_to_spv("gelu_f32", f"{generic_head}\n{shader_f32}\n{gelu_body}", {"A_TYPE": "float", "D_TYPE": "float"}, True)) - tasks.append(string_to_spv("silu_f32", f"{generic_head}\n{shader_f32}\n{silu_body}", {"A_TYPE": "float", "D_TYPE": "float"}, True)) - tasks.append(string_to_spv("relu_f32", f"{generic_head}\n{shader_f32}\n{relu_body}", {"A_TYPE": "float", "D_TYPE": "float"}, True)) + tasks.append(string_to_spv("gelu_f32", f"{generic_head}\n{shader_f32}\n{gelu_body}", {"A_TYPE": "float", "D_TYPE": "float"})) + tasks.append(string_to_spv("silu_f32", f"{generic_head}\n{shader_f32}\n{silu_body}", {"A_TYPE": "float", "D_TYPE": "float"})) + tasks.append(string_to_spv("relu_f32", f"{generic_head}\n{shader_f32}\n{relu_body}", {"A_TYPE": "float", "D_TYPE": "float"})) - tasks.append(string_to_spv("diag_mask_inf_f32", f"{diag_mask_inf_head}\n{shader_f32}\n{diag_mask_inf_body}", {"A_TYPE": "float", "D_TYPE": "float"}, True)) + tasks.append(string_to_spv("diag_mask_inf_f32", f"{diag_mask_inf_head}\n{shader_f32}\n{diag_mask_inf_body}", {"A_TYPE": "float", "D_TYPE": "float"})) - tasks.append(string_to_spv("soft_max_f32", f"{generic_head}\n{shader_f32}\n{soft_max_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float"}, True)) + tasks.append(string_to_spv("soft_max_f32", f"{generic_head}\n{shader_f32}\n{soft_max_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float"})) - tasks.append(string_to_spv("rope_f32", rope_src, {"A_TYPE": "float", "D_TYPE": "float"}, True)) - tasks.append(string_to_spv("rope_f16", rope_src, {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}, True)) + tasks.append(string_to_spv("rope_f32", rope_src, {"A_TYPE": "float", "D_TYPE": "float"})) + tasks.append(string_to_spv("rope_f16", rope_src, {"A_TYPE": "float16_t", "D_TYPE": "float16_t"})) - tasks.append(string_to_spv("rope_neox_f32", rope_neox_src, {"A_TYPE": "float", "D_TYPE": "float"}, True)) - tasks.append(string_to_spv("rope_neox_f16", rope_neox_src, {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}, True)) + tasks.append(string_to_spv("rope_neox_f32", rope_neox_src, {"A_TYPE": "float", "D_TYPE": "float"})) + tasks.append(string_to_spv("rope_neox_f16", rope_neox_src, {"A_TYPE": "float16_t", "D_TYPE": "float16_t"})) await asyncio.gather(*tasks) From 60ecf099eddfe70fec797ef6790572e452054add Mon Sep 17 00:00:00 2001 From: Martin Schwaighofer Date: Sun, 28 Jan 2024 12:59:43 +0100 Subject: [PATCH 16/94] add Vulkan support to Nix flake --- .devops/nix/package.nix | 21 +++++++++++++++++---- flake.nix | 1 + 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index a868a9a61..ad23f7dd7 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -13,18 +13,22 @@ cudaPackages, darwin, rocmPackages, + vulkan-headers, + vulkan-loader, clblast, useBlas ? builtins.all (x: !x) [ useCuda useMetalKit useOpenCL useRocm + useVulkan ], useCuda ? config.cudaSupport, useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, useMpi ? false, # Increases the runtime closure size by ~700M useOpenCL ? false, useRocm ? config.rocmSupport, + useVulkan ? false, llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake }@inputs: @@ -48,7 +52,8 @@ let ++ lib.optionals useMetalKit [ "MetalKit" ] ++ lib.optionals useMpi [ "MPI" ] ++ lib.optionals useOpenCL [ "OpenCL" ] - ++ lib.optionals useRocm [ "ROCm" ]; + ++ lib.optionals useRocm [ "ROCm" ] + ++ lib.optionals useVulkan [ "Vulkan" ]; pnameSuffix = strings.optionalString (suffices != [ ]) @@ -108,6 +113,11 @@ let hipblas rocblas ]; + + vulkanBuildInputs = [ + vulkan-headers + vulkan-loader + ]; in effectiveStdenv.mkDerivation ( @@ -164,7 +174,8 @@ effectiveStdenv.mkDerivation ( ++ optionals useCuda cudaBuildInputs ++ optionals useMpi [ mpi ] ++ optionals useOpenCL [ clblast ] - ++ optionals useRocm rocmBuildInputs; + ++ optionals useRocm rocmBuildInputs + ++ optionals useVulkan vulkanBuildInputs; cmakeFlags = [ @@ -178,6 +189,7 @@ effectiveStdenv.mkDerivation ( (cmakeBool "LLAMA_HIPBLAS" useRocm) (cmakeBool "LLAMA_METAL" useMetalKit) (cmakeBool "LLAMA_MPI" useMpi) + (cmakeBool "LLAMA_VULKAN" useVulkan) ] ++ optionals useCuda [ ( @@ -218,6 +230,7 @@ effectiveStdenv.mkDerivation ( useMpi useOpenCL useRocm + useVulkan ; shell = mkShell { @@ -242,11 +255,11 @@ effectiveStdenv.mkDerivation ( # Configurations we don't want even the CI to evaluate. Results in the # "unsupported platform" messages. This is mostly a no-op, because # cudaPackages would've refused to evaluate anyway. - badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin; + badPlatforms = optionals (useCuda || useOpenCL || useVulkan) lib.platforms.darwin; # Configurations that are known to result in build failures. Can be # overridden by importing Nixpkgs with `allowBroken = true`. - broken = (useMetalKit && !effectiveStdenv.isDarwin); + broken = (useMetalKit && !effectiveStdenv.isDarwin) || (useVulkan && effectiveStdenv.isDarwin); description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; homepage = "https://github.com/ggerganov/llama.cpp/"; diff --git a/flake.nix b/flake.nix index a776ba024..ad2f9b295 100644 --- a/flake.nix +++ b/flake.nix @@ -157,6 +157,7 @@ mpi-cpu = config.packages.default.override { useMpi = true; }; mpi-cuda = config.packages.default.override { useMpi = true; }; + vulkan = config.packages.default.override { useVulkan = true; }; } // lib.optionalAttrs (system == "x86_64-linux") { rocm = config.legacyPackages.llamaPackagesRocm.llama-cpp; From 3cc5ed353c07201d8d5b98b0a4713ab633da6d04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Sat, 3 Feb 2024 20:14:59 +0100 Subject: [PATCH 17/94] make: fix nvcc optimization flags for host code (#5309) --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a55d15888..40b16e0ea 100644 --- a/Makefile +++ b/Makefile @@ -109,6 +109,7 @@ MK_NVCCFLAGS += -O3 else MK_CFLAGS += -O3 MK_CXXFLAGS += -O3 +MK_NVCCFLAGS += -O3 endif # clock_gettime came in POSIX.1b (1993) @@ -365,7 +366,7 @@ ifdef LLAMA_CUBLAS MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib OBJS += ggml-cuda.o - MK_NVCCFLAGS = -use_fast_math + MK_NVCCFLAGS += -use_fast_math ifndef JETSON_EOL_MODULE_DETECT MK_NVCCFLAGS += --forward-unknown-to-host-compiler endif # JETSON_EOL_MODULE_DETECT From 3c0d25c4756742ebf15ad44700fabc0700c638bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Sat, 3 Feb 2024 20:15:13 +0100 Subject: [PATCH 18/94] make: add nvcc info print (#5310) --- Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 40b16e0ea..21d5e15ba 100644 --- a/Makefile +++ b/Makefile @@ -553,8 +553,11 @@ $(info I CFLAGS: $(CFLAGS)) $(info I CXXFLAGS: $(CXXFLAGS)) $(info I NVCCFLAGS: $(NVCCFLAGS)) $(info I LDFLAGS: $(LDFLAGS)) -$(info I CC: $(shell $(CC) --version | head -n 1)) -$(info I CXX: $(shell $(CXX) --version | head -n 1)) +$(info I CC: $(shell $(CC) --version | head -n 1)) +$(info I CXX: $(shell $(CXX) --version | head -n 1)) +ifdef LLAMA_CUBLAS +$(info I NVCC: $(shell $(NVCC) --version | tail -n 1)) +endif # LLAMA_CUBLAS $(info ) # From 277fad30c60ef3559dc2d01b19d05e659d40a824 Mon Sep 17 00:00:00 2001 From: Welby Seely Date: Sat, 3 Feb 2024 23:18:51 -0500 Subject: [PATCH 19/94] cmake : use set() for LLAMA_WIN_VER (#5298) option() is specifically for booleans. Fixes #5158 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c156c4824..8c04e4c19 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,7 +79,7 @@ if (NOT MSVC) endif() if (WIN32) - option(LLAMA_WIN_VER "llama: Windows Version" 0x602) + set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") endif() # 3rd party libs From 5ed26e1fc9fab4ce96ecf2d84183fe45bdcab0d4 Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Sun, 4 Feb 2024 10:39:58 +0200 Subject: [PATCH 20/94] Adding some imatrix tools (#5302) * imatrix: adding --combine and --continue-from * imatrix: be able to start from a specific chunk --------- Co-authored-by: Iwan Kawrakow --- examples/imatrix/imatrix.cpp | 116 +++++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 4 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index ea06fcdbf..bc9f6fa68 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -36,6 +36,8 @@ public: void set_parameters(StatParams&& params) { m_params = std::move(params); } bool collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data); void save_imatrix() const; + bool load_imatrix(const char * file_name, bool add); + static bool load_imatrix(const char * file_name, std::unordered_map& imatrix); private: std::unordered_map m_stats; StatParams m_params; @@ -189,6 +191,57 @@ void IMatrixCollector::save_imatrix(const char * fname) const { } } +bool IMatrixCollector::load_imatrix(const char * imatrix_file, std::unordered_map& imatrix_data) { + std::ifstream in(imatrix_file, std::ios::binary); + if (!in) { + printf("%s: failed to open %s\n",__func__,imatrix_file); + return false; + } + int n_entries; + in.read((char*)&n_entries, sizeof(n_entries)); + if (in.fail() || n_entries < 1) { + printf("%s: no data in file %s\n", __func__, imatrix_file); + return false; + } + for (int i = 0; i < n_entries; ++i) { + int len; in.read((char *)&len, sizeof(len)); + std::vector name_as_vec(len+1); + in.read((char *)name_as_vec.data(), len); + if (in.fail()) { + printf("%s: failed reading name for entry %d from %s\n",__func__,i+1,imatrix_file); + return false; + } + name_as_vec[len] = 0; + std::string name{name_as_vec.data()}; + auto& e = imatrix_data[std::move(name)]; + int ncall; + in.read((char*)&ncall, sizeof(ncall)); + int nval; + in.read((char *)&nval, sizeof(nval)); + if (in.fail() || nval < 1) { + printf("%s: failed reading number of values for entry %d\n",__func__,i); + imatrix_data = {}; + return false; + } + e.values.resize(nval); + in.read((char*)e.values.data(), nval*sizeof(float)); + if (in.fail()) { + printf("%s: failed reading data for entry %d\n",__func__,i); + imatrix_data = {}; + return false; + } + e.ncall = ncall; + } + return true; +} + +bool IMatrixCollector::load_imatrix(const char * file_name, bool add) { + if (!add) { + m_stats.clear(); + } + return load_imatrix(file_name, m_stats); +} + static IMatrixCollector g_collector; static bool ik_collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) { @@ -269,7 +322,7 @@ static void process_logits( } } -static bool compute_imatrix(llama_context * ctx, const gpt_params & params, bool compute_ppl) { +static bool compute_imatrix(llama_context * ctx, const gpt_params & params, bool compute_ppl, int from_chunk) { const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx)); const int n_ctx = llama_n_ctx(ctx); @@ -282,6 +335,15 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params, bool auto tim2 = std::chrono::high_resolution_clock::now(); fprintf(stderr, "%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast(tim2-tim1).count()); + if (from_chunk > 0) { + if (size_t((from_chunk + 2)*n_ctx) >= tokens.size()) { + fprintf(stderr, "%s: there will be not enough tokens left after removing %d chunks\n", __func__, from_chunk); + return false; + } + fprintf(stderr, "%s: removing initial %d chunks (%d tokens)\n", __func__, from_chunk, from_chunk*n_ctx); + tokens.erase(tokens.begin(), tokens.begin() + from_chunk*n_ctx); + } + if (int(tokens.size()) < 2*n_ctx) { fprintf(stderr, "%s: you need at least %d tokens for a context of %d tokens\n",__func__,2*n_ctx, n_ctx); @@ -402,7 +464,10 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params, bool int main(int argc, char ** argv) { StatParams sparams; + std::string prev_result_file; + std::string combine_files; bool compute_ppl = true; + int from_chunk = 0; std::vector args; args.push_back(argv[0]); int iarg = 1; @@ -423,6 +488,13 @@ int main(int argc, char ** argv) { compute_ppl = false; } else if (arg == "--keep-imatrix") { sparams.keep_every = std::stoi(argv[++iarg]); + } else if (arg == "--continue-from") { + prev_result_file = argv[++iarg]; + } else if (arg == "--combine") { + combine_files = argv[++iarg]; + } + else if (arg == "--from-chunk") { + from_chunk = std::stoi(argv[++iarg]); } else { args.push_back(argv[iarg]); } @@ -436,14 +508,50 @@ int main(int argc, char ** argv) { } } + g_collector.set_parameters(std::move(sparams)); + + if (!combine_files.empty()) { + std::vector files; + size_t pos = 0; + while (true) { + auto new_pos = combine_files.find(',', pos); + if (new_pos != std::string::npos) { + files.emplace_back(combine_files.substr(pos, new_pos - pos)); + pos = new_pos + 1; + } else { + files.emplace_back(combine_files.substr(pos)); + break; + } + } + if (files.size() < 2) { + fprintf(stderr, "You must provide at least two comma separated files to use --combine\n"); + return 1; + } + printf("Combining the following %d files\n", int(files.size())); + for (auto& file : files) { + printf(" %s\n", file.c_str()); + if (!g_collector.load_imatrix(file.c_str(), true)) { + fprintf(stderr, "Failed to load %s\n", file.c_str()); + return 1; + } + } + g_collector.save_imatrix(); + return 0; + } + + if (!prev_result_file.empty()) { + if (!g_collector.load_imatrix(prev_result_file.c_str(), false)) { + fprintf(stderr, "=============== Failed to load %s\n", prev_result_file.c_str()); + return 1; + } + } + gpt_params params; params.n_batch = 512; if (!gpt_params_parse(args.size(), args.data(), params)) { return 1; } - g_collector.set_parameters(std::move(sparams)); - params.logits_all = true; params.n_batch = std::min(params.n_batch, params.n_ctx); @@ -495,7 +603,7 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s\n", get_system_info(params).c_str()); } - bool OK = compute_imatrix(ctx, params, compute_ppl); + bool OK = compute_imatrix(ctx, params, compute_ppl, from_chunk); if (!OK) { return 1; } From 9392ebd49ea5ae236a55b47cbf6a13247e8a3b8c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 4 Feb 2024 00:17:24 +0000 Subject: [PATCH 21/94] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'flake-parts': 'github:hercules-ci/flake-parts/07f6395285469419cf9d078f59b5b49993198c00' (2024-01-11) → 'github:hercules-ci/flake-parts/b253292d9c0a5ead9bc98c4e9a26c6312e27d69f' (2024-02-01) • Updated input 'flake-parts/nixpkgs-lib': 'github:NixOS/nixpkgs/b0d36bd0a420ecee3bc916c91886caca87c894e9?dir=lib' (2023-12-30) → 'github:NixOS/nixpkgs/97b17f32362e475016f942bbdfda4a4a72a8a652?dir=lib' (2024-01-29) • Updated input 'nixpkgs': 'github:NixOS/nixpkgs/ae5c332cbb5827f6b1f02572496b141021de335f' (2024-01-25) → 'github:NixOS/nixpkgs/b8b232ae7b8b144397fdb12d20f592e5e7c1a64d' (2024-01-31) --- flake.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/flake.lock b/flake.lock index 95e41f333..8cfc78273 100644 --- a/flake.lock +++ b/flake.lock @@ -5,11 +5,11 @@ "nixpkgs-lib": "nixpkgs-lib" }, "locked": { - "lastModified": 1704982712, - "narHash": "sha256-2Ptt+9h8dczgle2Oo6z5ni5rt/uLMG47UFTR1ry/wgg=", + "lastModified": 1706830856, + "narHash": "sha256-a0NYyp+h9hlb7ddVz4LUn1vT/PLwqfrWYcHMvFB1xYg=", "owner": "hercules-ci", "repo": "flake-parts", - "rev": "07f6395285469419cf9d078f59b5b49993198c00", + "rev": "b253292d9c0a5ead9bc98c4e9a26c6312e27d69f", "type": "github" }, "original": { @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1706191920, - "narHash": "sha256-eLihrZAPZX0R6RyM5fYAWeKVNuQPYjAkCUBr+JNvtdE=", + "lastModified": 1706732774, + "narHash": "sha256-hqJlyJk4MRpcItGYMF+3uHe8HvxNETWvlGtLuVpqLU0=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "ae5c332cbb5827f6b1f02572496b141021de335f", + "rev": "b8b232ae7b8b144397fdb12d20f592e5e7c1a64d", "type": "github" }, "original": { @@ -37,11 +37,11 @@ "nixpkgs-lib": { "locked": { "dir": "lib", - "lastModified": 1703961334, - "narHash": "sha256-M1mV/Cq+pgjk0rt6VxoyyD+O8cOUiai8t9Q6Yyq4noY=", + "lastModified": 1706550542, + "narHash": "sha256-UcsnCG6wx++23yeER4Hg18CXWbgNpqNXcHIo5/1Y+hc=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "b0d36bd0a420ecee3bc916c91886caca87c894e9", + "rev": "97b17f32362e475016f942bbdfda4a4a72a8a652", "type": "github" }, "original": { From 4833ac209da6a427de64f97e8f403dcdc5de6bc3 Mon Sep 17 00:00:00 2001 From: AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> Date: Mon, 5 Feb 2024 07:08:24 +0000 Subject: [PATCH 22/94] [SYCL] Fix cpy with dims of 3 (#5289) * Fix cpy with dims of 3 * rm asserts --------- Co-authored-by: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> --- ggml-sycl.cpp | 194 +++++++++++++++++++++++++++++--------------------- 1 file changed, 114 insertions(+), 80 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 51445b5e7..a03df4c65 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -7693,6 +7693,13 @@ static void cpy_1_f16_f16(const char * cxi, char * cdsti) { *dsti = *xi; } +static void cpy_1_f16_f32(const char * cxi, char * cdsti) { + const sycl::half *xi = (const sycl::half *)cxi; + float *dsti = (float *)cdsti; + + *dsti = *xi; +} + static void cpy_1_i16_i16(const char * cxi, char * cdsti) { const int16_t *xi = (const int16_t *)cxi; int16_t *dsti = (int16_t *)cdsti; @@ -7709,9 +7716,9 @@ static void cpy_1_i32_i32(const char * cxi, char * cdsti) { template static void cpy_f32_f16(const char * cx, char * cdst, const int ne, - const int ne00, const int ne01, const int nb00, const int nb01, const int nb02, - const int ne10, const int ne11, const int nb10, const int nb11, const int nb12, - const sycl::nd_item<3> &item_ct1) { + const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02, + const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, + const int nb12, const int nb13, const sycl::nd_item<3> &item_ct1) { const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2); @@ -7721,15 +7728,17 @@ static void cpy_f32_f16(const char * cx, char * cdst, const int ne, // determine indices i02/i12, i01/i11, i00/i10 as a function of index i of flattened tensor // then combine those indices with the corresponding byte offsets to get the total offsets - const int i02 = i / (ne00*ne01); - const int i01 = (i - i02*ne01*ne00) / ne00; - const int i00 = i - i02*ne01*ne00 - i01*ne00; - const int x_offset = i00*nb00 + i01*nb01 + i02*nb02; + const int i03 = i/(ne00 * ne01 * ne02); + const int i02 = (i - i03*ne00*ne01*ne02 )/ (ne00*ne01); + const int i01 = (i - i03*ne00*ne01*ne02 - i02*ne01*ne00) / ne00; + const int i00 = i - i03*ne00*ne01*ne02 - i02*ne01*ne00 - i01*ne00; + const int x_offset = i00*nb00 + i01*nb01 + i02*nb02 + i03 * nb03; - const int i12 = i / (ne10*ne11); - const int i11 = (i - i12*ne10*ne11) / ne10; - const int i10 = i - i12*ne10*ne11 - i11*ne10; - const int dst_offset = i10*nb10 + i11*nb11 + i12*nb12; + const int i13 = i/(ne10 * ne11 * ne12); + const int i12 = (i - i13*ne10*ne11*ne12) / (ne10*ne11); + const int i11 = (i - i13*ne10*ne11*ne12 - i12*ne10*ne11) / ne10; + const int i10 = i - i13*ne10*ne11*ne12 - i12*ne10*ne11 - i11*ne10; + const int dst_offset = i10*nb10 + i11*nb11 + i12*nb12 + i13 * nb13; cpy_1(cx + x_offset, cdst + dst_offset); } @@ -7823,9 +7832,9 @@ static void cpy_blck_f32_q4_1(const char * cxi, char * cdsti) { template static void cpy_f32_q(const char * cx, char * cdst, const int ne, - const int ne00, const int ne01, const int nb00, const int nb01, const int nb02, - const int ne10, const int ne11, const int nb10, const int nb11, const int nb12, - const sycl::nd_item<3> &item_ct1) { + const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02, + const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, + const int nb12, const int nb13, const sycl::nd_item<3> &item_ct1) { const int i = (item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2)) * qk; @@ -7834,15 +7843,17 @@ static void cpy_f32_q(const char * cx, char * cdst, const int ne, return; } - const int i02 = i / (ne00*ne01); - const int i01 = (i - i02*ne01*ne00) / ne00; - const int i00 = (i - i02*ne01*ne00 - i01*ne00); - const int x_offset = i00*nb00 + i01*nb01 + i02*nb02; + const int i03 = i/(ne00 * ne01 * ne02); + const int i02 = (i - i03*ne00*ne01*ne02 )/ (ne00*ne01); + const int i01 = (i - i03*ne00*ne01*ne02 - i02*ne01*ne00) / ne00; + const int i00 = i - i03*ne00*ne01*ne02 - i02*ne01*ne00 - i01*ne00; + const int x_offset = i00*nb00 + i01*nb01 + i02*nb02 + i03 * nb03; - const int i12 = i / (ne10*ne11); - const int i11 = (i - i12*ne10*ne11) / ne10; - const int i10 = (i - i12*ne10*ne11 - i11*ne10)/qk; - const int dst_offset = i10*nb10 + i11*nb11 + i12*nb12; + const int i13 = i/(ne10 * ne11 * ne12); + const int i12 = (i - i13*ne10*ne11*ne12) / (ne10*ne11); + const int i11 = (i - i13*ne10*ne11*ne12 - i12*ne10*ne11) / ne10; + const int i10 = i - i13*ne10*ne11*ne12 - i12*ne10*ne11 - i11*ne10; + const int dst_offset = (i10/qk)*nb10 + i11*nb11 + i12*nb12 + i13*nb13; cpy_blck(cx + x_offset, cdst + dst_offset); } @@ -10599,10 +10610,12 @@ static void ggml_mul_mat_vec_nc_f16_f32_sycl( static void ggml_cpy_f32_f32_sycl(const char *cx, char *cdst, const int ne, const int ne00, const int ne01, - const int nb00, const int nb01, - const int nb02, const int ne10, - const int ne11, const int nb10, - const int nb11, const int nb12, + const int ne02, const int nb00, + const int nb01, const int nb02, + const int nb03, const int ne10, + const int ne11, const int ne12, + const int nb10, const int nb11, + const int nb12, const int nb13, dpct::queue_ptr stream) { const int num_blocks = (ne + SYCL_CPY_BLOCK_SIZE - 1) / SYCL_CPY_BLOCK_SIZE; @@ -10615,8 +10628,8 @@ static void ggml_cpy_f32_f32_sycl(const char *cx, char *cdst, const int ne, sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { - cpy_f32_f16(cx, cdst, ne, ne00, ne01, nb00, nb01, - nb02, ne10, ne11, nb10, nb11, nb12, + cpy_f32_f16(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, + nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); }); } @@ -10624,10 +10637,12 @@ static void ggml_cpy_f32_f32_sycl(const char *cx, char *cdst, const int ne, static void ggml_cpy_f32_f16_sycl(const char *cx, char *cdst, const int ne, const int ne00, const int ne01, - const int nb00, const int nb01, - const int nb02, const int ne10, - const int ne11, const int nb10, - const int nb11, const int nb12, + const int ne02, const int nb00, + const int nb01, const int nb02, + const int nb03, const int ne10, + const int ne11, const int ne12, + const int nb10, const int nb11, + const int nb12, const int nb13, dpct::queue_ptr stream) { const int num_blocks = (ne + SYCL_CPY_BLOCK_SIZE - 1) / SYCL_CPY_BLOCK_SIZE; @@ -10640,8 +10655,8 @@ static void ggml_cpy_f32_f16_sycl(const char *cx, char *cdst, const int ne, sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { - cpy_f32_f16(cx, cdst, ne, ne00, ne01, nb00, nb01, - nb02, ne10, ne11, nb10, nb11, nb12, + cpy_f32_f16(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, + nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); }); } @@ -10649,10 +10664,12 @@ static void ggml_cpy_f32_f16_sycl(const char *cx, char *cdst, const int ne, static void ggml_cpy_f32_q8_0_sycl(const char *cx, char *cdst, const int ne, const int ne00, const int ne01, - const int nb00, const int nb01, - const int nb02, const int ne10, - const int ne11, const int nb10, - const int nb11, const int nb12, + const int ne02, const int nb00, + const int nb01, const int nb02, + const int nb03, const int ne10, + const int ne11, const int ne12, + const int nb10, const int nb11, + const int nb12, const int nb13, dpct::queue_ptr stream) { GGML_ASSERT(ne % QK8_0 == 0); @@ -10661,17 +10678,20 @@ static void ggml_cpy_f32_q8_0_sycl(const char *cx, char *cdst, const int ne, sycl::range<3>(1, 1, 1)), [=](sycl::nd_item<3> item_ct1) { cpy_f32_q( - cx, cdst, ne, ne00, ne01, nb00, nb01, nb02, - ne10, ne11, nb10, nb11, nb12, item_ct1); + cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, + nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, + item_ct1); }); } static void ggml_cpy_f32_q4_0_sycl(const char *cx, char *cdst, const int ne, const int ne00, const int ne01, - const int nb00, const int nb01, - const int nb02, const int ne10, - const int ne11, const int nb10, - const int nb11, const int nb12, + const int ne02, const int nb00, + const int nb01, const int nb02, + const int nb03, const int ne10, + const int ne11, const int ne12, + const int nb10, const int nb11, + const int nb12, const int nb13, dpct::queue_ptr stream) { GGML_ASSERT(ne % QK4_0 == 0); @@ -10680,17 +10700,20 @@ static void ggml_cpy_f32_q4_0_sycl(const char *cx, char *cdst, const int ne, sycl::range<3>(1, 1, 1)), [=](sycl::nd_item<3> item_ct1) { cpy_f32_q( - cx, cdst, ne, ne00, ne01, nb00, nb01, nb02, - ne10, ne11, nb10, nb11, nb12, item_ct1); + cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, + nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, + item_ct1); }); } static void ggml_cpy_f32_q4_1_sycl(const char *cx, char *cdst, const int ne, const int ne00, const int ne01, - const int nb00, const int nb01, - const int nb02, const int ne10, - const int ne11, const int nb10, - const int nb11, const int nb12, + const int ne02, const int nb00, + const int nb01, const int nb02, + const int nb03, const int ne10, + const int ne11, const int ne12, + const int nb10, const int nb11, + const int nb12, const int nb13, dpct::queue_ptr stream) { GGML_ASSERT(ne % QK4_1 == 0); @@ -10699,17 +10722,20 @@ static void ggml_cpy_f32_q4_1_sycl(const char *cx, char *cdst, const int ne, sycl::range<3>(1, 1, 1)), [=](sycl::nd_item<3> item_ct1) { cpy_f32_q( - cx, cdst, ne, ne00, ne01, nb00, nb01, nb02, - ne10, ne11, nb10, nb11, nb12, item_ct1); + cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, + nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, + item_ct1); }); } static void ggml_cpy_f16_f16_sycl(const char *cx, char *cdst, const int ne, const int ne00, const int ne01, - const int nb00, const int nb01, - const int nb02, const int ne10, - const int ne11, const int nb10, - const int nb11, const int nb12, + const int ne02, const int nb00, + const int nb01, const int nb02, + const int nb03, const int ne10, + const int ne11, const int ne12, + const int nb10, const int nb11, + const int nb12, const int nb13, dpct::queue_ptr stream) { const int num_blocks = (ne + SYCL_CPY_BLOCK_SIZE - 1) / SYCL_CPY_BLOCK_SIZE; @@ -10722,8 +10748,8 @@ static void ggml_cpy_f16_f16_sycl(const char *cx, char *cdst, const int ne, sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { - cpy_f32_f16(cx, cdst, ne, ne00, ne01, nb00, nb01, - nb02, ne10, ne11, nb10, nb11, nb12, + cpy_f32_f16(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, + nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); }); } @@ -10731,10 +10757,12 @@ static void ggml_cpy_f16_f16_sycl(const char *cx, char *cdst, const int ne, static void ggml_cpy_i16_i16_sycl(const char *cx, char *cdst, const int ne, const int ne00, const int ne01, - const int nb00, const int nb01, - const int nb02, const int ne10, - const int ne11, const int nb10, - const int nb11, const int nb12, + const int ne02, const int nb00, + const int nb01, const int nb02, + const int nb03, const int ne10, + const int ne11, const int ne12, + const int nb10, const int nb11, + const int nb12, const int nb13, dpct::queue_ptr stream) { const int num_blocks = (ne + SYCL_CPY_BLOCK_SIZE - 1) / SYCL_CPY_BLOCK_SIZE; @@ -10747,8 +10775,8 @@ static void ggml_cpy_i16_i16_sycl(const char *cx, char *cdst, const int ne, sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { - cpy_f32_f16(cx, cdst, ne, ne00, ne01, nb00, nb01, - nb02, ne10, ne11, nb10, nb11, nb12, + cpy_f32_f16(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, + nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); }); } @@ -10756,10 +10784,12 @@ static void ggml_cpy_i16_i16_sycl(const char *cx, char *cdst, const int ne, static void ggml_cpy_i32_i32_sycl(const char *cx, char *cdst, const int ne, const int ne00, const int ne01, - const int nb00, const int nb01, - const int nb02, const int ne10, - const int ne11, const int nb10, - const int nb11, const int nb12, + const int ne02, const int nb00, + const int nb01, const int nb02, + const int nb03, const int ne10, + const int ne11, const int ne12, + const int nb10, const int nb11, + const int nb12, const int nb13, dpct::queue_ptr stream) { const int num_blocks = (ne + SYCL_CPY_BLOCK_SIZE - 1) / SYCL_CPY_BLOCK_SIZE; @@ -10772,8 +10802,8 @@ static void ggml_cpy_i32_i32_sycl(const char *cx, char *cdst, const int ne, sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { - cpy_f32_f16(cx, cdst, ne, ne00, ne01, nb00, nb01, - nb02, ne10, ne11, nb10, nb11, nb12, + cpy_f32_f16(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, + nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); }); } @@ -13910,19 +13940,23 @@ static void ggml_sycl_cpy(const ggml_tensor *src0, const ggml_tensor *src1, const int64_t ne00 = src0->ne[0]; const int64_t ne01 = src0->ne[1]; - GGML_ASSERT(src0->ne[3] == 1); + const int64_t ne02 = src0->ne[2]; + const int64_t nb00 = src0->nb[0]; const int64_t nb01 = src0->nb[1]; const int64_t nb02 = src0->nb[2]; + const int64_t nb03 = src0->nb[3]; const int64_t ne10 = src1->ne[0]; const int64_t ne11 = src1->ne[1]; - GGML_ASSERT(src1->ne[3] == 1); + const int64_t ne12 = src1->ne[2]; + const int64_t nb10 = src1->nb[0]; const int64_t nb11 = src1->nb[1]; const int64_t nb12 = src1->nb[2]; + const int64_t nb13 = src1->nb[3]; SYCL_CHECK(ggml_sycl_set_device(g_main_device)); dpct::queue_ptr main_stream = g_syclStreams[g_main_device_index][0]; @@ -13934,21 +13968,21 @@ static void ggml_sycl_cpy(const ggml_tensor *src0, const ggml_tensor *src1, char * src1_ddc = (char *) src1_extra->data_device[g_main_device_index]; if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32) { - ggml_cpy_f32_f32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream); + ggml_cpy_f32_f32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream); } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F16) { - ggml_cpy_f32_f16_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream); + ggml_cpy_f32_f16_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream); } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_Q8_0) { - ggml_cpy_f32_q8_0_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream); + ggml_cpy_f32_q8_0_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream); } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_Q4_0) { - ggml_cpy_f32_q4_0_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream); + ggml_cpy_f32_q4_0_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream); } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_Q4_1) { - ggml_cpy_f32_q4_1_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream); + ggml_cpy_f32_q4_1_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream); } else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F16) { - ggml_cpy_f16_f16_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream); + ggml_cpy_f16_f16_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream); } else if (src0->type == GGML_TYPE_I16 && src1->type == GGML_TYPE_I16) { - ggml_cpy_i16_i16_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream); + ggml_cpy_i16_i16_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream); } else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) { - ggml_cpy_i32_i32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream); + ggml_cpy_i32_i32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream); } else { fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__, ggml_type_name(src0->type), ggml_type_name(src1->type)); From 5d55b0cd827bb0fcfedfa329a82bd5d6ef2c93ca Mon Sep 17 00:00:00 2001 From: chiranko <96988916+chiranko@users.noreply.github.com> Date: Mon, 5 Feb 2024 15:41:38 +0800 Subject: [PATCH 23/94] readme : add CodeShell models to the supported models list (#5330) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 4a9bdf314..a6fe34629 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,7 @@ as the main playground for developing new features for the [ggml](https://github - [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral) - [x] [PLaMo-13B](https://github.com/ggerganov/llama.cpp/pull/3557) - [x] [GPT-2](https://huggingface.co/gpt2) +- [x] [CodeShell](https://github.com/WisdomShell/codeshell) **Multimodal models:** From 4be04c8965578edc09194fab769b4b922b8444f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9D=D0=B8=D1=8F=D0=B7=20=D0=93=D0=B0=D1=80=D0=B8=D1=84?= =?UTF-8?q?=D0=B7=D1=8F=D0=BD=D0=BE=D0=B2?= <112617865+garrnizon@users.noreply.github.com> Date: Mon, 5 Feb 2024 10:43:57 +0300 Subject: [PATCH 24/94] scripts : add non-interactive server-llm.sh (#5303) * Update server-llm.sh Add flag --non-interactive that allows run script without asking a permission * Update scripts/server-llm.sh --------- Co-authored-by: Georgi Gerganov --- scripts/server-llm.sh | 73 ++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/scripts/server-llm.sh b/scripts/server-llm.sh index 0b83cdbbc..062b70496 100644 --- a/scripts/server-llm.sh +++ b/scripts/server-llm.sh @@ -47,6 +47,7 @@ if ! command -v make &> /dev/null; then fi # parse arguments +is_interactive=1 port=8888 repo="" wtype="" @@ -66,15 +67,16 @@ verbose=0 function print_usage { printf "Usage:\n" - printf " ./server-llm.sh [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose]\n\n" - printf " --port: port number, default is 8888\n" - printf " --repo: path to a repo containing GGUF model files\n" - printf " --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input\n" - printf " --backend: cpu, cuda, metal, opencl, depends on the OS\n" - printf " --gpu-id: gpu id, default is 0\n" - printf " --n-parallel: number of parallel requests, default is 8\n" - printf " --n-kv: KV cache size, default is 4096\n" - printf " --verbose: verbose output\n\n" + printf " ./server-llm.sh [-interactive] [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose]\n\n" + printf " --non-interactive: run without asking a permision to run\n" + printf " --port: port number, default is 8888\n" + printf " --repo: path to a repo containing GGUF model files\n" + printf " --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input\n" + printf " --backend: cpu, cuda, metal, opencl, depends on the OS\n" + printf " --gpu-id: gpu id, default is 0\n" + printf " --n-parallel: number of parallel requests, default is 8\n" + printf " --n-kv: KV cache size, default is 4096\n" + printf " --verbose: verbose output\n\n" printf "Example:\n\n" printf ' bash -c "$(curl -s https://ggml.ai/server-llm.sh)"\n\n' } @@ -82,6 +84,10 @@ function print_usage { while [[ $# -gt 0 ]]; do key="$1" case $key in + --non-interactive) + is_interactive=0 + shift + ;; --port) port="$2" shift @@ -176,31 +182,32 @@ repos=( "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF" "https://huggingface.co/TheBloke/CausalLM-7B-GGUF" ) +if [ $is_interactive -eq 1 ]; then + printf "\n" + printf "[I] This is a helper script for deploying llama.cpp's server on this machine.\n\n" + printf " Based on the options that follow, the script might download a model file\n" + printf " from the internet, which can be a few GBs in size. The script will also\n" + printf " build the latest llama.cpp source code from GitHub, which can be unstable.\n" + printf "\n" + printf " Upon success, an HTTP server will be started and it will serve the selected\n" + printf " model using llama.cpp for demonstration purposes.\n" + printf "\n" + printf " Please note:\n" + printf "\n" + printf " - All new data will be stored in the current folder\n" + printf " - The server will be listening on all network interfaces\n" + printf " - The server will run with default settings which are not always optimal\n" + printf " - Do not judge the quality of a model based on the results from this script\n" + printf " - Do not use this script to benchmark llama.cpp\n" + printf " - Do not use this script in production\n" + printf " - This script is only for demonstration purposes\n" + printf "\n" + printf " If you don't know what you are doing, please press Ctrl-C to abort now\n" + printf "\n" + printf " Press Enter to continue ...\n\n" -printf "\n" -printf "[I] This is a helper script for deploying llama.cpp's server on this machine.\n\n" -printf " Based on the options that follow, the script might download a model file\n" -printf " from the internet, which can be a few GBs in size. The script will also\n" -printf " build the latest llama.cpp source code from GitHub, which can be unstable.\n" -printf "\n" -printf " Upon success, an HTTP server will be started and it will serve the selected\n" -printf " model using llama.cpp for demonstration purposes.\n" -printf "\n" -printf " Please note:\n" -printf "\n" -printf " - All new data will be stored in the current folder\n" -printf " - The server will be listening on all network interfaces\n" -printf " - The server will run with default settings which are not always optimal\n" -printf " - Do not judge the quality of a model based on the results from this script\n" -printf " - Do not use this script to benchmark llama.cpp\n" -printf " - Do not use this script in production\n" -printf " - This script is only for demonstration purposes\n" -printf "\n" -printf " If you don't know what you are doing, please press Ctrl-C to abort now\n" -printf "\n" -printf " Press Enter to continue ...\n\n" - -read + read +fi if [[ -z "$repo" ]]; then printf "[+] No repo provided from the command line\n" From 30679d438d5225b3aecf5cec6482cbc9f8f87ba5 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 5 Feb 2024 09:48:03 +0200 Subject: [PATCH 25/94] scripts : fix typos, cleanup (#5303) --- scripts/server-llm.sh | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/scripts/server-llm.sh b/scripts/server-llm.sh index 062b70496..30bbac321 100644 --- a/scripts/server-llm.sh +++ b/scripts/server-llm.sh @@ -14,16 +14,17 @@ # - Might be unstable! # # Usage: -# ./server-llm.sh [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose] +# ./server-llm.sh [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose] [-non-interactive] # -# --port: port number, default is 8888 -# --repo: path to a repo containing GGUF model files -# --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input -# --backend: cpu, cuda, metal, opencl, depends on the OS -# --gpu-id: gpu id, default is 0 -# --n-parallel: number of parallel requests, default is 8 -# --n-kv: KV cache size, default is 4096 -# --verbose: verbose output +# --port: port number, default is 8888 +# --repo: path to a repo containing GGUF model files +# --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input +# --backend: cpu, cuda, metal, opencl, depends on the OS +# --gpu-id: gpu id, default is 0 +# --n-parallel: number of parallel requests, default is 8 +# --n-kv: KV cache size, default is 4096 +# --verbose: verbose output +# --non-interactive: run without asking a permission to run # # Example: # @@ -67,8 +68,7 @@ verbose=0 function print_usage { printf "Usage:\n" - printf " ./server-llm.sh [-interactive] [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose]\n\n" - printf " --non-interactive: run without asking a permision to run\n" + printf " ./server-llm.sh [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose] [-non-interactive]\n\n" printf " --port: port number, default is 8888\n" printf " --repo: path to a repo containing GGUF model files\n" printf " --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input\n" @@ -77,6 +77,7 @@ function print_usage { printf " --n-parallel: number of parallel requests, default is 8\n" printf " --n-kv: KV cache size, default is 4096\n" printf " --verbose: verbose output\n\n" + printf " --non-interactive: run without asking a permission to run\n" printf "Example:\n\n" printf ' bash -c "$(curl -s https://ggml.ai/server-llm.sh)"\n\n' } From e6f81775323f6f4e4a30abf022a6028fa86b79ac Mon Sep 17 00:00:00 2001 From: l3utterfly Date: Mon, 5 Feb 2024 17:00:47 +0900 Subject: [PATCH 26/94] common : add dynamic temperature parameters to main example cli (#5295) * added dynamic temp params in main * added help text --- common/common.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/common/common.cpp b/common/common.cpp index 3302caa20..8c1a60583 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -399,6 +399,18 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { break; } sparams.penalty_present = std::stof(argv[i]); + } else if (arg == "--dynatemp-range") { + if (++i >= argc) { + invalid_param = true; + break; + } + sparams.dynatemp_range = std::stof(argv[i]); + } else if (arg == "--dynatemp-exp") { + if (++i >= argc) { + invalid_param = true; + break; + } + sparams.dynatemp_exponent = std::stof(argv[i]); } else if (arg == "--mirostat") { if (++i >= argc) { invalid_param = true; @@ -942,6 +954,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)sparams.penalty_repeat); printf(" --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)sparams.penalty_present); printf(" --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)sparams.penalty_freq); + printf(" --dynatemp-range N dynamic temperature range (default: %.1f, 0.0 = disabled)\n", (double)sparams.dynatemp_range); + printf(" --dynatemp-exp N dynamic temperature exponent (default: %.1f)\n", (double)sparams.dynatemp_exponent); printf(" --mirostat N use Mirostat sampling.\n"); printf(" Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"); printf(" (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", sparams.mirostat); From a2d60c9158435ae9a6f14632f07f1acf7a3becef Mon Sep 17 00:00:00 2001 From: Alexey Parfenov Date: Mon, 5 Feb 2024 08:10:22 +0000 Subject: [PATCH 27/94] server : allow to get default generation settings for completion (#5307) --- examples/server/README.md | 16 +++++++++++++++- examples/server/server.cpp | 7 ++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index fe934dab1..d8e7c313e 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -264,7 +264,21 @@ Notice that each `probs` is an array of length `n_probs`. It also accepts all the options of `/completion` except `stream` and `prompt`. -- **GET** `/props`: Return the required assistant name and anti-prompt to generate the prompt in case you have specified a system prompt for all slots. +- **GET** `/props`: Return current server settings. + +### Result JSON + +```json +{ + "assistant_name": "", + "user_name": "", + "default_generation_settings": { ... } +} +``` + +- `assistant_name` - the required assistant name to generate the prompt in case you have specified a system prompt for all slots. +- `user_name` - the required anti-prompt to generate the prompt in case you have specified a system prompt for all slots. +- `default_generation_settings` - the default generation settings for the `/completion` endpoint, has the same fields as the `generation_settings` response object from the `/completion` endpoint. - **POST** `/v1/chat/completions`: OpenAI-compatible Chat Completions API. Given a ChatML-formatted json description in `messages`, it returns the predicted completion. Both synchronous and streaming mode are supported, so scripted and interactive applications work fine. While no strong claims of compatibility with OpenAI API spec is being made, in our experience it suffices to support many apps. Only ChatML-tuned models, such as Dolphin, OpenOrca, OpenHermes, OpenChat-3.5, etc can be used with this endpoint. Compared to `api_like_OAI.py` this API implementation does not require a wrapper to be served. diff --git a/examples/server/server.cpp b/examples/server/server.cpp index a9f8cb369..8000fee5c 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -334,6 +334,7 @@ struct llama_server_context // slots / clients std::vector slots; + json default_generation_settings_for_props; llama_server_queue queue_tasks; llama_server_response queue_results; @@ -430,6 +431,9 @@ struct llama_server_context slots.push_back(slot); } + default_generation_settings_for_props = get_formated_generation(slots.front()); + default_generation_settings_for_props["seed"] = -1; + batch = llama_batch_init(n_ctx, 0, params.n_parallel); // empty system prompt @@ -2614,7 +2618,8 @@ int main(int argc, char **argv) res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); json data = { { "user_name", llama.name_user.c_str() }, - { "assistant_name", llama.name_assistant.c_str() } + { "assistant_name", llama.name_assistant.c_str() }, + { "default_generation_settings", llama.default_generation_settings_for_props } }; res.set_content(data.dump(), "application/json; charset=utf-8"); }); From 6fdfa2ecc684000a25a4ad91823bc82a6652b645 Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Mon, 5 Feb 2024 10:46:06 +0200 Subject: [PATCH 28/94] iq2_xxs: tune quantization (#5320) We get slightly better PPL, and we cut quantization time in nearly half. The trick is to 1st quantize without forcing points onto the E8-lattice. We can then use a narrower search range around the block scale that we got that way. Co-authored-by: Iwan Kawrakow --- ggml-quants.c | 58 ++++++--------------------------------------------- 1 file changed, 6 insertions(+), 52 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index 8236385bc..014c0525a 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -9048,8 +9048,6 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict int8_t L[32]; int8_t Laux[32]; float waux[32]; - bool is_on_grid[4]; - bool is_on_grid_aux[4]; uint8_t block_signs[4]; uint32_t q2[2*(QK_K/32)]; @@ -9099,10 +9097,11 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict memset(L, 0, 32); continue; } + float scale = make_qp_quants(32, kMaxQ+1, xval, (uint8_t*)L, weight); + float eff_max = scale*kMaxQ; float best = 0; - float scale = max/(2*kMaxQ-1); - for (int is = -9; is <= 9; ++is) { - float id = (2*kMaxQ-1+is*0.1f)/max; + for (int is = -6; is <= 6; ++is) { + float id = (2*kMaxQ-1+is*0.1f)/eff_max; float this_scale = 1/id; for (int k = 0; k < 4; ++k) { for (int i = 0; i < 8; ++i) { @@ -9112,9 +9111,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict uint16_t u = 0; for (int i = 0; i < 8; ++i) u |= (Laux[8*k+i] << 2*i); int grid_index = kmap_q2xs[u]; - is_on_grid_aux[k] = true; if (grid_index < 0) { - is_on_grid_aux[k] = false; const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1; grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, this_scale, Laux + 8*k); } @@ -9128,16 +9125,12 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict } if (sumq2 > 0 && sumqx*sumqx > best*sumq2) { scale = sumqx/sumq2; best = scale*sumqx; - for (int i = 0; i < 32; ++i) L[i] = Laux[i]; - for (int k = 0; k < 4; ++k) is_on_grid[k] = is_on_grid_aux[k]; + memcpy(L, Laux, 32); } } - int n_not_ongrid = 0; - for (int k = 0; k < 4; ++k) if (!is_on_grid[k]) ++n_not_ongrid; - if (n_not_ongrid > 0 && scale > 0) { + if (scale > 0) { float id = 1/scale; for (int k = 0; k < 4; ++k) { - if (is_on_grid[k]) continue; uint16_t u = 0; for (int i = 0; i < 8; ++i) { int l = nearest_int(0.5f*(id*xval[8*k+i]-1)); @@ -9193,49 +9186,10 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict float d = max_scale/31; y[ibl].d = GGML_FP32_TO_FP16(d); float id = 1/d; - float sumqx = 0, sumq2 = 0; for (int ib = 0; ib < QK_K/32; ++ib) { int l = nearest_int(0.5f*(id*scales[ib]-1)); l = MAX(0, MIN(15, l)); q2[2*ib+1] |= ((uint32_t)l << 28); - const float * xb = xbl + 32*ib; - const float * qw = quant_weights + QK_K*ibl + 32*ib; - for (int i = 0; i < 32; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]); - const uint8_t * aux8 = (const uint8_t *)(q2 + 2*ib); - const float db = d * (1 + 2*l); - uint32_t u = 0; - for (int k = 0; k < 4; ++k) { - const int8_t * signs = keven_signs_q2xs + 8*((q2[2*ib+1] >> 7*k) & 127); - const float * xk = xb + 8*k; - const float * wk = weight + 8*k; - const uint8_t * grid = (const uint8_t *)(kgrid_q2xs + aux8[k]); - float best_mse = 0; int best_index = aux8[k]; - for (int j = 0; j < 8; ++j) { - float diff = db * grid[j] * signs[j] - xk[j]; - best_mse += wk[j] * diff * diff; - } - for (int idx = 0; idx < 256; ++idx) { - grid = (const uint8_t *)(kgrid_q2xs + idx); - float mse = 0; - for (int j = 0; j < 8; ++j) { - float diff = db * grid[j] * signs[j] - xk[j]; - mse += wk[j] * diff * diff; - } - if (mse < best_mse) { - best_mse = mse; best_index = idx; - } - } - u |= (best_index << 8*k); - grid = (const uint8_t *)(kgrid_q2xs + best_index); - //grid = (const uint8_t *)(kgrid_q2xs + aux8[k]); - for (int j = 0; j < 8; ++j) { - float q = db * grid[j] * signs[j]; - sumqx += wk[j] * q * xk[j]; - sumq2 += wk[j] * q * q; - } - } - q2[2*ib] = u; - if (sumq2 > 0) y[ibl].d = GGML_FP32_TO_FP16(d*sumqx/sumq2); } memcpy(y[ibl].qs, q2, QK_K/4); } From 7e1ae372f36d98fa66b1d778c5862904b4d80c88 Mon Sep 17 00:00:00 2001 From: Guoteng <32697156+SolenoidWGT@users.noreply.github.com> Date: Mon, 5 Feb 2024 17:04:06 +0800 Subject: [PATCH 29/94] py : fix internlm2-hf convert to gguf (#5305) * py : fix internlm2-hf convert to gguf * ggml-ci --- convert-hf-to-gguf.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index a6ffd128b..5e343742d 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -1416,8 +1416,32 @@ class InternLM2Model(Model): self.gguf_writer.add_add_space_prefix(add_prefix) special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) + old_eos = special_vocab.special_token_ids["eos"] + if "chat" in os.path.basename(self.dir_model.absolute()): + # For the chat model, we replace the eos with '<|im_end|>'. + special_vocab.special_token_ids["eos"] = self._try_get_sft_eos(tokenizer) + print(f"Replace eos:{old_eos} with a special token:{special_vocab.special_token_ids['eos']} \ +in chat mode so that the conversation can end normally.") + special_vocab.add_to_gguf(self.gguf_writer) + def _try_get_sft_eos(self, tokenizer): + unused_145_list = tokenizer.encode('[UNUSED_TOKEN_145]') + im_end_list = tokenizer.encode('<|im_end|>') + assert (len(unused_145_list) == 1) ^ (len(im_end_list) == 1) + if len(unused_145_list) == 1: + eos_token = unused_145_list[0] + if len(im_end_list) == 1: + eos_token = im_end_list[0] + return eos_token + + def _hf_permute_qk(self, weights, n_head: int, n_head_kv: int): + if n_head_kv is not None and n_head != n_head_kv: + n_head = n_head_kv + return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) + .swapaxes(1, 2) + .reshape(weights.shape)) + def set_gguf_parameters(self): self.gguf_writer.add_name("InternLM2") self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) @@ -1486,8 +1510,9 @@ class InternLM2Model(Model): qkv = data_torch qkv = rearrange(qkv.T, " o (g n i) ->o g n i", g=num_groups, n=q_per_kv + 2, i=head_dim) q, k, v = qkv[..., : q_per_kv, :], qkv[..., q_per_kv: q_per_kv + 1, :], qkv[..., q_per_kv + 1: q_per_kv + 2, :] - q = rearrange(q, " o g n i -> o (g n i)").T - k = rearrange(k, " o g n i -> o (g n i)").T + # The model weights of q and k equire additional reshape. + q = self._hf_permute_qk(rearrange(q, " o g n i -> o (g n i)").T, num_heads, num_heads) + k = self._hf_permute_qk(rearrange(k, " o g n i -> o (g n i)").T, num_heads, num_kv_heads) v = rearrange(v, " o g n i -> o (g n i)").T self.post_write_tensors(tensor_map, f"model.layers.{bid}.attention.wq.weight", q) self.post_write_tensors(tensor_map, f"model.layers.{bid}.attention.wk.weight", k) From 89503dcb5f764a5cc7093db1f395f5121876a2cc Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Mon, 5 Feb 2024 12:32:27 +0200 Subject: [PATCH 30/94] iq3_xxs: quards for the no-imatrix situation (#5334) Co-authored-by: Iwan Kawrakow --- llama.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llama.cpp b/llama.cpp index 4787a92fe..65e399adc 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9456,8 +9456,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && qs.model.hparams.n_gqa() >= 4) { new_type = GGML_TYPE_Q4_K; } - else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && qs.model.hparams.n_gqa() >= 4) { - new_type = GGML_TYPE_Q4_K; + else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) { + new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_Q3_K : GGML_TYPE_IQ3_XXS; } else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) { new_type = qs.i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K; @@ -9496,9 +9496,9 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XS) { if (i_layer < n_layer/8) new_type = GGML_TYPE_Q4_K; } - //else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) { - // if (i_layer < n_layer/8) new_type = GGML_TYPE_Q5_K; - //} + else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && !qs.has_imatrix) { + new_type = i_layer < n_layer/8 ? GGML_TYPE_Q4_K : GGML_TYPE_Q3_K; + } else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) { new_type = i_layer < n_layer/16 ? GGML_TYPE_Q5_K : arch != LLM_ARCH_FALCON || use_more_bits(i_layer, n_layer) ? GGML_TYPE_Q4_K From abb61944a5f64dec62c893ed0db10790169b672a Mon Sep 17 00:00:00 2001 From: "Dr. Tom Murphy VII Ph.D" <499244+tom7@users.noreply.github.com> Date: Mon, 5 Feb 2024 06:13:57 -0500 Subject: [PATCH 31/94] ggml : avoid duplicating function calls using MIN/MAX macros (#5325) * Avoid duplicating function calls when using MIN/MAX macros. Since these copy "a" and "b" they ask the compiler to evaluate one of them twice. The compiler doesn't have a problem with removing the duplication in something like MAX(0, x + 2), but in some cases we're calling functions, and those calls just happen twice. By explicitly evaluating at the expression we get smaller and faster code without duplicate calls. See ggml_rope_yarn_corr_dims in Compiler Explorer: https://godbolt.org/z/Ee4KMrvKh Code behaves exactly the same. * Update ggml.c --------- Co-authored-by: Georgi Gerganov --- ggml.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ggml.c b/ggml.c index ee994c875..b9ec0c981 100644 --- a/ggml.c +++ b/ggml.c @@ -2470,7 +2470,8 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) { size_t max_size = 0; for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) { - max_size = MAX(max_size, ggml_nbytes(tensor)); + size_t bytes = ggml_nbytes(tensor); + max_size = MAX(max_size, bytes); } return max_size; @@ -11887,8 +11888,10 @@ GGML_CALL void ggml_rope_yarn_corr_dims( int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2] ) { // start and end correction dims - dims[0] = MAX(0, floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base))); - dims[1] = MIN(n_dims - 1, ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base))); + float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base)); + float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base)); + dims[0] = MAX(0, start); + dims[1] = MIN(n_dims - 1, end); } static void ggml_compute_forward_rope_f32( From c6b395535a6874d749ef47c33eacd466cb252cd5 Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Mon, 5 Feb 2024 14:09:47 +0200 Subject: [PATCH 32/94] ggml : make use of ggml-quants.h possible in C++ code (#5338) * Make use of ggml-quants.h possible in C++ code * One cannot possibly be defining static_assert in a C++ compilation --------- Co-authored-by: Iwan Kawrakow --- ggml-impl.h | 2 + ggml-quants.h | 117 +++++++++++++++++++++++++++----------------------- 2 files changed, 65 insertions(+), 54 deletions(-) diff --git a/ggml-impl.h b/ggml-impl.h index 2c58075ac..19df66bce 100644 --- a/ggml-impl.h +++ b/ggml-impl.h @@ -19,6 +19,7 @@ extern "C" { // fall back to the _Static_assert C11 keyword. // if C99 - static_assert is noop // ref: https://stackoverflow.com/a/53923785/4039976 +#ifndef __cplusplus #ifndef static_assert #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L) #define static_assert(cond, msg) _Static_assert(cond, msg) @@ -26,6 +27,7 @@ extern "C" { #define static_assert(cond, msg) struct global_scope_noop_trick #endif #endif +#endif // __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512 #if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__)) diff --git a/ggml-quants.h b/ggml-quants.h index 5c9f63bd9..bfdf3c997 100644 --- a/ggml-quants.h +++ b/ggml-quants.h @@ -191,70 +191,74 @@ typedef struct { } block_iq3_xxs; static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding"); +#ifdef __cplusplus +extern "C" { +#endif + // Quantization -void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k); -void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k); -void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k); -void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k); -void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k); -void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k); +void quantize_row_q4_0_reference(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int k); +void quantize_row_q4_1_reference(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int k); +void quantize_row_q5_0_reference(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int k); +void quantize_row_q5_1_reference(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int k); +void quantize_row_q8_0_reference(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int k); +void quantize_row_q8_1_reference(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int k); -void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k); -void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k); -void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k); -void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k); -void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k); -void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k); -void quantize_row_iq3_xxs_reference(const float * restrict x, block_iq3_xxs * restrict y, int k); +void quantize_row_q2_K_reference(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k); +void quantize_row_q3_K_reference(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int k); +void quantize_row_q4_K_reference(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int k); +void quantize_row_q5_K_reference(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int k); +void quantize_row_q6_K_reference(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int k); +void quantize_row_q8_K_reference(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int k); +void quantize_row_iq3_xxs_reference(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int k); -void quantize_row_q4_0(const float * restrict x, void * restrict y, int k); -void quantize_row_q4_1(const float * restrict x, void * restrict y, int k); -void quantize_row_q5_0(const float * restrict x, void * restrict y, int k); -void quantize_row_q5_1(const float * restrict x, void * restrict y, int k); -void quantize_row_q8_0(const float * restrict x, void * restrict y, int k); -void quantize_row_q8_1(const float * restrict x, void * restrict y, int k); +void quantize_row_q4_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); +void quantize_row_q4_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); +void quantize_row_q5_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); +void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); +void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); +void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); -void quantize_row_q2_K(const float * restrict x, void * restrict y, int k); -void quantize_row_q3_K(const float * restrict x, void * restrict y, int k); -void quantize_row_q4_K(const float * restrict x, void * restrict y, int k); -void quantize_row_q5_K(const float * restrict x, void * restrict y, int k); -void quantize_row_q6_K(const float * restrict x, void * restrict y, int k); -void quantize_row_q8_K(const float * restrict x, void * restrict y, int k); -void quantize_row_iq3_xxs(const float * restrict x, void * restrict y, int k); +void quantize_row_q2_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); +void quantize_row_q3_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); +void quantize_row_q4_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); +void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); +void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); +void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); +void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); // Dequantization -void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k); -void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k); -void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k); -void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k); -void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k); -//void dequantize_row_q8_1(const block_q8_1 * restrict x, float * restrict y, int k); +void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +//void dequantize_row_q8_1(const block_q8_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); -void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k); -void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k); -void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k); -void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k); -void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k); -void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k); -void dequantize_row_iq2_xxs(const block_iq2_xxs * restrict x, float * restrict y, int k); -void dequantize_row_iq2_xs (const block_iq2_xs * restrict x, float * restrict y, int k); -void dequantize_row_iq3_xxs(const block_iq3_xxs * restrict x, float * restrict y, int k); +void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +void dequantize_row_iq2_xs (const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); // Dot product -void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy); -void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, const void * restrict vx, const void * restrict vy); -void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy); -void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, const void * restrict vx, const void * restrict vy); -void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy); +void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); +void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); +void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); +void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); +void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); -void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); -void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); -void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); -void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); -void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); -void ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); -void ggml_vec_dot_iq2_xs_q8_K (int n, float * restrict s, const void * restrict vx, const void * restrict vy); -void ggml_vec_dot_iq3_xxs_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); +void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); +void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); +void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); +void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); +void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); +void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); +void ggml_vec_dot_iq2_xs_q8_K (int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); +void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); // // Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization") @@ -276,3 +280,8 @@ void iq2xs_init_impl(int grid_size); void iq2xs_free_impl(int grid_size); void iq3xs_init_impl(int grid_size); void iq3xs_free_impl(int grid_size); + +#ifdef __cplusplus +} +#endif + From 78b00dda6c0d62c34f5371d47718defff6ed2b22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Mon, 5 Feb 2024 15:55:10 +0100 Subject: [PATCH 33/94] README: updated introduction (#5343) * README: updated introduction * readme : update --------- Co-authored-by: Georgi Gerganov --- README.md | 49 ++++++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index a6fe34629..bb6c49338 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ [Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggerganov/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml) -Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++ +Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++ ### Hot topics @@ -58,18 +58,20 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++ ## Description -The main goal of `llama.cpp` is to run the LLaMA model using 4-bit integer quantization on a MacBook +The main goal of `llama.cpp` is to enable LLM inference with minimal setup and state-of-the-art performance on a wide +variety of hardware - locally and in the cloud. -- Plain C/C++ implementation without dependencies -- Apple silicon first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks +- Plain C/C++ implementation without any dependencies +- Apple silicon is a first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks - AVX, AVX2 and AVX512 support for x86 architectures -- Mixed F16 / F32 precision -- 2-bit, 3-bit, 4-bit, 5-bit, 6-bit and 8-bit integer quantization support -- CUDA, Metal, OpenCL, SYCL GPU backend support +- 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use +- Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP) +- Vulkan, SYCL, and (partial) OpenCL backend support +- CPU+GPU hybrid inference to partially accelerate models larger than the total VRAM capacity -The original implementation of `llama.cpp` was [hacked in an evening](https://github.com/ggerganov/llama.cpp/issues/33#issuecomment-1465108022). -Since then, the project has improved significantly thanks to many contributions. This project is mainly for educational purposes and serves -as the main playground for developing new features for the [ggml](https://github.com/ggerganov/ggml) library. +Since its [inception](https://github.com/ggerganov/llama.cpp/issues/33#issuecomment-1465108022), the project has +improved significantly thanks to many contributions. It is the main playground for developing new features for the +[ggml](https://github.com/ggerganov/ggml) library. **Supported platforms:** @@ -77,11 +79,14 @@ as the main playground for developing new features for the [ggml](https://github - [X] Linux - [X] Windows (via CMake) - [X] Docker +- [X] FreeBSD **Supported models:** - [X] LLaMA 🦙 - [x] LLaMA 2 🦙🦙 +- [X] [Mistral AI v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) +- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral) - [X] Falcon - [X] [Alpaca](https://github.com/ggerganov/llama.cpp#instruction-mode-with-alpaca) - [X] [GPT4All](https://github.com/ggerganov/llama.cpp#using-gpt4all) @@ -95,7 +100,6 @@ as the main playground for developing new features for the [ggml](https://github - [X] [Baichuan 1 & 2](https://huggingface.co/models?search=baichuan-inc/Baichuan) + [derivations](https://huggingface.co/hiyouga/baichuan-7b-sft) - [X] [Aquila 1 & 2](https://huggingface.co/models?search=BAAI/Aquila) - [X] [Starcoder models](https://github.com/ggerganov/llama.cpp/pull/3187) -- [X] [Mistral AI v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) - [X] [Refact](https://huggingface.co/smallcloudai/Refact-1_6B-fim) - [X] [Persimmon 8B](https://github.com/ggerganov/llama.cpp/pull/3410) - [X] [MPT](https://github.com/ggerganov/llama.cpp/pull/3417) @@ -104,15 +108,14 @@ as the main playground for developing new features for the [ggml](https://github - [X] [StableLM-3b-4e1t](https://github.com/ggerganov/llama.cpp/pull/3586) - [x] [Deepseek models](https://huggingface.co/models?search=deepseek-ai/deepseek) - [x] [Qwen models](https://huggingface.co/models?search=Qwen/Qwen) -- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral) - [x] [PLaMo-13B](https://github.com/ggerganov/llama.cpp/pull/3557) - [x] [GPT-2](https://huggingface.co/gpt2) - [x] [CodeShell](https://github.com/WisdomShell/codeshell) **Multimodal models:** -- [x] [Llava 1.5 models](https://huggingface.co/collections/liuhaotian/llava-15-653aac15d994e992e2677a7e) -- [x] [Bakllava](https://huggingface.co/models?search=SkunkworksAI/Bakllava) +- [x] [LLaVA 1.5 models](https://huggingface.co/collections/liuhaotian/llava-15-653aac15d994e992e2677a7e) +- [x] [BakLLaVA](https://huggingface.co/models?search=SkunkworksAI/Bakllava) - [x] [Obsidian](https://huggingface.co/NousResearch/Obsidian-3B-V0.5) - [x] [ShareGPT4V](https://huggingface.co/models?search=Lin-Chen/ShareGPT4V) - [x] [MobileVLM 1.7B/3B models](https://huggingface.co/models?search=mobileVLM) @@ -137,14 +140,22 @@ as the main playground for developing new features for the [ggml](https://github **UI:** +Unless otherwise noted these projects are open-source with permissive licensing: + +- [iohub/collama](https://github.com/iohub/coLLaMA) +- [janhq/jan](https://github.com/janhq/jan) (AGPL) - [nat/openplayground](https://github.com/nat/openplayground) -- [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) -- [withcatai/catai](https://github.com/withcatai/catai) -- [semperai/amica](https://github.com/semperai/amica) +- [LMStudio](https://lmstudio.ai/) (proprietary) +- [LostRuins/koboldcpp](https://github.com/LostRuins/koboldcpp) (AGPL) +- [Mozilla-Ocho/llamafile](https://github.com/Mozilla-Ocho/llamafile) +- [nomic-ai/gpt4all](https://github.com/nomic-ai/gpt4all) +- [ollama/ollama](https://github.com/ollama/ollama) +- [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) (AGPL) - [psugihara/FreeChat](https://github.com/psugihara/FreeChat) - [ptsochantaris/emeltal](https://github.com/ptsochantaris/emeltal) -- [iohub/collama](https://github.com/iohub/coLLaMA) -- [pythops/tenere](https://github.com/pythops/tenere) +- [pythops/tenere](https://github.com/pythops/tenere) (AGPL) +- [semperai/amica](https://github.com/semperai/amica) +- [withcatai/catai](https://github.com/withcatai/catai) --- From 098f6d737b65134cf220d12b9b706e8cfc5e4610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Mon, 5 Feb 2024 19:33:00 +0100 Subject: [PATCH 34/94] make: Use ccache for faster compilation (#5318) * make: Use ccache for faster compilation --- CMakeLists.txt | 4 +- Makefile | 169 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 121 insertions(+), 52 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c04e4c19..427015be5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -809,9 +809,9 @@ if (LLAMA_CCACHE) if (LLAMA_CCACHE_FOUND) set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) set(ENV{CCACHE_SLOPPINESS} time_macros) - message(STATUS "Using ccache") + message(STATUS "ccache found, compilation results will be cached. Disable with LLAMA_CCACHE=OFF.") else() - message(STATUS "Warning: ccache not found - consider installing it or use LLAMA_CCACHE=OFF") + message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with LLAMA_CCACHE=OFF") endif () endif() diff --git a/Makefile b/Makefile index 21d5e15ba..ba73f0637 100644 --- a/Makefile +++ b/Makefile @@ -112,6 +112,18 @@ MK_CXXFLAGS += -O3 MK_NVCCFLAGS += -O3 endif +ifndef LLAMA_NO_CCACHE +CCACHE := $(shell which ccache) +ifdef CCACHE +export CCACHE_SLOPPINESS = time_macros +$(info I ccache found, compilation results will be cached. Disable with LLAMA_NO_CCACHE.) +CC := $(CCACHE) $(CC) +CXX := $(CCACHE) $(CXX) +else +$(info I ccache not found. Consider installing it for faster compilation.) +endif # CCACHE +endif # LLAMA_NO_CCACHE + # clock_gettime came in POSIX.1b (1993) # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional # posix_memalign came in POSIX.1-2001 / SUSv3 @@ -374,9 +386,9 @@ ifdef LLAMA_DEBUG MK_NVCCFLAGS += -lineinfo endif # LLAMA_DEBUG ifdef LLAMA_CUDA_NVCC - NVCC = $(LLAMA_CUDA_NVCC) + NVCC = $(CCACHE) $(LLAMA_CUDA_NVCC) else - NVCC = nvcc + NVCC = $(CCACHE) nvcc endif #LLAMA_CUDA_NVCC ifdef CUDA_DOCKER_ARCH MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) @@ -483,7 +495,7 @@ ifdef LLAMA_HIPBLAS ROCM_PATH ?= /opt/rocm GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch) endif - HIPCC ?= $(ROCM_PATH)/bin/hipcc + HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc LLAMA_CUDA_DMMV_X ?= 32 LLAMA_CUDA_MMV_Y ?= 1 LLAMA_CUDA_KQUANTS_ITER ?= 2 @@ -607,97 +619,135 @@ libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS) clean: rm -vrf *.o tests/*.o *.so *.a *.dll benchmark-matmult common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS) + find examples pocs -type f -name "*.o" -delete # # Examples # +# $< is the first prerequisite, i.e. the source file. +# Explicitly compile this to an object file so that it can be cached with ccache. +# The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead. + +# Helper function that replaces .c, .cpp, and .cu file endings with .o: +GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1)))) + main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) @echo @echo '==== Run ./main -h for help. ====' @echo infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o common.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) quantize: examples/quantize/quantize.cpp build-info.o ggml.o llama.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) - $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2) -Wno-cast-qual + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual + $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h %.hpp $< examples/llava/clip.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) -o $@ $(LDFLAGS) $(LWINSOCK2) gguf: examples/gguf/gguf.cpp ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -Wno-cast-qual + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual + $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp) + $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS) baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) beam-search: examples/beam-search/beam-search.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) lookup: examples/lookup/lookup.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) ifeq ($(UNAME_S),Darwin) swift: examples/batched.swift @@ -705,7 +755,7 @@ swift: examples/batched.swift endif common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh - @sh scripts/build-info.sh $(CC) > $@.tmp + @sh scripts/build-info.sh "$(CC)" > $@.tmp @if ! cmp -s $@.tmp $@; then \ mv $@.tmp $@; \ else \ @@ -722,7 +772,8 @@ build-info.o: common/build-info.cpp tests: $(TEST_TARGETS) benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) run-benchmark-matmult: benchmark-matmult ./$@ @@ -730,58 +781,76 @@ run-benchmark-matmult: benchmark-matmult .PHONY: run-benchmark-matmult swift vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-c.o: tests/test-c.c llama.h $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@ tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) From 906cff55c2848fda091d888a1585915ec0c9ea9e Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 6 Feb 2024 07:47:22 +0200 Subject: [PATCH 35/94] py : handle byte tokens in `get_token_type` (#5341) * py : handle byte tokens in `get_token_type` * py : fix empty bytes arg --- convert.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/convert.py b/convert.py index 75c100118..4a2847a27 100755 --- a/convert.py +++ b/convert.py @@ -515,10 +515,14 @@ class HfVocab: # Yield token text, score, and type yield token_text, self.get_token_score(token_id), self.get_token_type( - token_id, self.special_ids # Reuse already stored special IDs + token_id, token_text, self.special_ids # Reuse already stored special IDs ) - def get_token_type(self, token_id: int, special_ids: set[int]) -> gguf.TokenType: + def get_token_type(self, token_id: int, token_text: bytes, special_ids: set[int]) -> gguf.TokenType: + # Special case for byte tokens + if re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text): + return gguf.TokenType.BYTE + # Determine token type based on whether it's a special token return gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL @@ -530,7 +534,7 @@ class HfVocab: def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: for text in self.added_tokens_list: if text in self.specials: - toktype = self.get_token_type(self.specials[text], self.special_ids) + toktype = self.get_token_type(self.specials[text], b'', self.special_ids) score = self.get_token_score(self.specials[text]) else: toktype = gguf.TokenType.USER_DEFINED From 4ffc7a17d4e80c5f3f905139cb570ed9b6934fcb Mon Sep 17 00:00:00 2001 From: Niall Coates <1349685+Niall-@users.noreply.github.com> Date: Tue, 6 Feb 2024 08:16:23 +0000 Subject: [PATCH 36/94] server : various fixes for the prompt field in /completion (#5300) server : fix deadlock when prompt array contains strings and numbers server : removed an unnecessary generation when generating multi-prompts server : removed an unnecessary assert --- examples/server/server.cpp | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 8000fee5c..fc7e723a1 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1163,13 +1163,30 @@ struct llama_server_context task.multitask_id = multitask_id; // when a completion task's prompt array is not a singleton, we split it into multiple requests - if (task.data.count("prompt") && task.data.at("prompt").size() > 1) - { - split_multiprompt_task(task_id, task); - } - // otherwise, it's a single-prompt task, we actually queue it - queue_tasks.post(task); + // if there's numbers in the prompt array it will be treated as an array of tokens + if (task.data.count("prompt") != 0 && task.data.at("prompt").size() > 1) { + bool numbers = false; + for (const auto& e : task.data.at("prompt")) { + if (e.is_number()) { + numbers = true; + break; + } + } + + // NOTE: split_multiprompt_task() does not handle a mix of strings and numbers, + // it will completely stall the server. I don't know where the bug for this is. + // + // if there are numbers, it needs to be treated like a single prompt, + // queue_tasks handles a mix of strings and numbers just fine. + if (numbers) { + queue_tasks.post(task); + } else { + split_multiprompt_task(task_id, task); + } + } else { + queue_tasks.post(task); + } } // for multiple images processing @@ -1251,7 +1268,10 @@ struct llama_server_context void split_multiprompt_task(int multitask_id, task_server& multiprompt_task) { int prompt_count = multiprompt_task.data.at("prompt").size(); - assert(prompt_count > 1); + if (prompt_count <= 1) { + send_error(multiprompt_task, "error while handling multiple prompts"); + return; + } // generate all the ID for subtask std::vector subtask_ids(prompt_count); From 31e790322133a4b1d0684527ea446e765e8a96cf Mon Sep 17 00:00:00 2001 From: Michael Coppola Date: Tue, 6 Feb 2024 04:20:00 -0500 Subject: [PATCH 37/94] server : add `dynatemp_range` and `dynatemp_exponent` (#5352) * server: added `dynatemp_range` and `dynatemp_exponent` * Update README.md --------- Co-authored-by: Michael Coppola --- examples/server/README.md | 4 ++++ examples/server/server.cpp | 46 +++++++++++++++++++++----------------- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index d8e7c313e..46d8f85ae 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -137,6 +137,10 @@ node index.js `temperature`: Adjust the randomness of the generated text (default: 0.8). + `dynatemp_range`: Dynamic temperature range (default: 0.0, 0.0 = disabled). + + `dynatemp_exponent`: Dynamic temperature exponent (default: 1.0). + `top_k`: Limit the next token selection to the K most probable tokens (default: 40). `top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.95). diff --git a/examples/server/server.cpp b/examples/server/server.cpp index fc7e723a1..e48a1da75 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -524,27 +524,29 @@ struct llama_server_context slot->oaicompat_model = ""; } - slot->params.stream = json_value(data, "stream", false); - slot->params.cache_prompt = json_value(data, "cache_prompt", false); - slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict); - slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k); - slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p); - slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p); - slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z); - slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p); - slot->sparams.temp = json_value(data, "temperature", default_sparams.temp); - slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n); - slot->sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat); - slot->sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq); - slot->sparams.penalty_present = json_value(data, "presence_penalty", default_sparams.penalty_present); - slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat); - slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau); - slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta); - slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl); - slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep); - slot->params.seed = json_value(data, "seed", default_params.seed); - slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); - slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs); + slot->params.stream = json_value(data, "stream", false); + slot->params.cache_prompt = json_value(data, "cache_prompt", false); + slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict); + slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k); + slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p); + slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p); + slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z); + slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p); + slot->sparams.temp = json_value(data, "temperature", default_sparams.temp); + slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range); + slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent); + slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n); + slot->sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat); + slot->sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq); + slot->sparams.penalty_present = json_value(data, "presence_penalty", default_sparams.penalty_present); + slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat); + slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau); + slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta); + slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl); + slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep); + slot->params.seed = json_value(data, "seed", default_params.seed); + slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); + slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs); // infill if (data.count("input_prefix") != 0) @@ -1002,6 +1004,8 @@ struct llama_server_context {"model", params.model_alias}, {"seed", slot.params.seed}, {"temperature", slot.sparams.temp}, + {"dynatemp_range", slot.sparams.dynatemp_range}, + {"dynatemp_exponent", slot.sparams.dynatemp_exponent}, {"top_k", slot.sparams.top_k}, {"top_p", slot.sparams.top_p}, {"min_p", slot.sparams.min_p}, From 8a79c591de9b7ff3242a94f68b7fb5a17ed8c2be Mon Sep 17 00:00:00 2001 From: Justin Parker Date: Tue, 6 Feb 2024 04:20:59 -0500 Subject: [PATCH 38/94] server : include total "num_slots" in props endpoint (#5349) --- examples/server/server.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index e48a1da75..d86d7e04a 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -432,6 +432,7 @@ struct llama_server_context } default_generation_settings_for_props = get_formated_generation(slots.front()); + default_generation_settings_for_props["num_slots"] = params.n_parallel; default_generation_settings_for_props["seed"] = -1; batch = llama_batch_init(n_ctx, 0, params.n_parallel); From 2c516611f1d0f1e5e9754f8ea1cf97cb1b17bf2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Tue, 6 Feb 2024 14:44:06 +0100 Subject: [PATCH 39/94] CUDA: mul_mat_vec_q for batch sizes > 1 (#5351) --- ggml-cuda.cu | 240 +++++++++++++++++++++------------------------------ 1 file changed, 98 insertions(+), 142 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 3242a0b4a..95161b3f4 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -5310,41 +5310,50 @@ template static __global__ void #endif // __CUDA_ARCH__ >= CC_VOLTA } -template -static __global__ void mul_mat_vec_q(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows) { +template +static __global__ void mul_mat_vec_q( + const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, + const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y_par) { + + const int ncols_y = ncols_y_template != 0 ? ncols_y_template : ncols_y_par; + const int row = blockIdx.x*blockDim.y + threadIdx.y; - if (row >= nrows) { + if (row >= nrows_x) { return; } - const int blocks_per_row = ncols / qk; + const int blocks_per_row_x = ncols_x / qk; + const int blocks_per_col_y = nrows_y / QK8_1; const int blocks_per_warp = vdr * WARP_SIZE / qi; // partial sum for each thread - float tmp = 0.0f; + float tmp[ncols_y_template != 0 ? ncols_y_template : 8] = {0.0f}; const block_q_t * x = (const block_q_t *) vx; const block_q8_1 * y = (const block_q8_1 *) vy; - for (int i = threadIdx.x / (qi/vdr); i < blocks_per_row; i += blocks_per_warp) { - const int ibx = row*blocks_per_row + i; // x block index + for (int i = threadIdx.x / (qi/vdr); i < blocks_per_row_x; i += blocks_per_warp) { + const int ibx = row*blocks_per_row_x + i; // x block index const int iby = i * (qk/QK8_1); // y block index that aligns with ibx const int iqs = vdr * (threadIdx.x % (qi/vdr)); // x block quant index when casting the quants to int - tmp += vec_dot_q_cuda(&x[ibx], &y[iby], iqs); +#pragma unroll + for (int j = 0; j < ncols_y; ++j) { + tmp[j] += vec_dot_q_cuda(&x[ibx], &y[j*blocks_per_col_y + iby], iqs); + } } // sum up partial sums and write back result #pragma unroll - for (int mask = 16; mask > 0; mask >>= 1) { - tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32); - } + for (int j = 0; j < ncols_y; ++j) { + tmp[j] = warp_reduce_sum(tmp[j]); - if (threadIdx.x == 0) { - dst[row] = tmp; + if (threadIdx.x == 0) { + dst[j*nrows_x + row] = tmp[j]; + } } } @@ -6816,121 +6825,56 @@ static void convert_mul_mat_vec_f16_cuda(const void * vx, const dfloat * y, floa <<>>(vx, y, dst, ncols, nrows); } -static void mul_mat_vec_q4_0_q8_1_cuda(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - GGML_ASSERT(ncols % QK4_0 == 0); - const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; - const dim3 block_nums(block_num_y, 1, 1); - const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - mul_mat_vec_q - <<>>(vx, vy, dst, ncols, nrows); -} +template +static void mul_mat_vec_q_cuda( + const void * vx, const void * vy, float * dst, + const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, cudaStream_t stream) { -static void mul_mat_vec_q4_1_q8_1_cuda(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - GGML_ASSERT(ncols % QK4_1 == 0); - const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; - const dim3 block_nums(block_num_y, 1, 1); - const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - mul_mat_vec_q - <<>>(vx, vy, dst, ncols, nrows); -} + GGML_ASSERT(ncols_x % qk == 0); + GGML_ASSERT(ncols_y <= 8); -static void mul_mat_vec_q5_0_q8_1_cuda(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - GGML_ASSERT(ncols % QK5_0 == 0); - const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; + const int block_num_y = (nrows_x + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; const dim3 block_nums(block_num_y, 1, 1); const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - mul_mat_vec_q - <<>>(vx, vy, dst, ncols, nrows); -} - -static void mul_mat_vec_q5_1_q8_1_cuda(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - GGML_ASSERT(ncols % QK5_1 == 0); - const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; - const dim3 block_nums(block_num_y, 1, 1); - const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - mul_mat_vec_q - <<>>(vx, vy, dst, ncols, nrows); -} - -static void mul_mat_vec_q8_0_q8_1_cuda(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - GGML_ASSERT(ncols % QK8_0 == 0); - const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; - const dim3 block_nums(block_num_y, 1, 1); - const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - mul_mat_vec_q - <<>>(vx, vy, dst, ncols, nrows); -} - -static void mul_mat_vec_q2_K_q8_1_cuda(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - GGML_ASSERT(ncols % QK_K == 0); - const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; - const dim3 block_nums(block_num_y, 1, 1); - const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - mul_mat_vec_q - <<>>(vx, vy, dst, ncols, nrows); -} - -static void mul_mat_vec_q3_K_q8_1_cuda(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - GGML_ASSERT(ncols % QK_K == 0); - const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; - const dim3 block_nums(block_num_y, 1, 1); - const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - mul_mat_vec_q - <<>>(vx, vy, dst, ncols, nrows); -} - -static void mul_mat_vec_q4_K_q8_1_cuda(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - GGML_ASSERT(ncols % QK_K == 0); - const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; - const dim3 block_nums(block_num_y, 1, 1); - const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - mul_mat_vec_q - <<>>(vx, vy, dst, ncols, nrows); -} - -static void mul_mat_vec_q5_K_q8_1_cuda(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - GGML_ASSERT(ncols % QK_K == 0); - const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; - const dim3 block_nums(block_num_y, 1, 1); - const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - mul_mat_vec_q - <<>>(vx, vy, dst, ncols, nrows); -} - -static void mul_mat_vec_q6_K_q8_1_cuda(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - GGML_ASSERT(ncols % QK_K == 0); - const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; - const dim3 block_nums(block_num_y, 1, 1); - const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - mul_mat_vec_q - <<>>(vx, vy, dst, ncols, nrows); -} - -static void mul_mat_vec_iq2_xxs_q8_1_cuda(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - GGML_ASSERT(ncols % QK_K == 0); - const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; - const dim3 block_nums(block_num_y, 1, 1); - const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - mul_mat_vec_q - <<>>(vx, vy, dst, ncols, nrows); -} - -static void mul_mat_vec_iq2_xs_q8_1_cuda(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - GGML_ASSERT(ncols % QK_K == 0); - const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; - const dim3 block_nums(block_num_y, 1, 1); - const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - mul_mat_vec_q - <<>>(vx, vy, dst, ncols, nrows); -} - -static void mul_mat_vec_iq3_xxs_q8_1_cuda(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - GGML_ASSERT(ncols % QK_K == 0); - const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; - const dim3 block_nums(block_num_y, 1, 1); - const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - mul_mat_vec_q - <<>>(vx, vy, dst, ncols, nrows); + switch (ncols_y) { + case 1: + mul_mat_vec_q<1, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + break; + case 2: + mul_mat_vec_q<2, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + break; + case 3: + mul_mat_vec_q<3, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + break; + case 4: + mul_mat_vec_q<4, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + break; + case 5: + mul_mat_vec_q<5, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + break; + case 6: + mul_mat_vec_q<6, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + break; + case 7: + mul_mat_vec_q<7, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + break; + case 8: + mul_mat_vec_q<8, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + break; + default: + GGML_ASSERT(false); + // mul_mat_vec_q<0, qk, qi, block_q_t, vdr, vec_dot> + // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + break; + } } static void ggml_mul_mat_q4_0_q8_1_cuda( @@ -8578,50 +8522,61 @@ static void ggml_cuda_op_mul_mat_vec_q( const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols, const int64_t src1_padded_row_size, cudaStream_t stream) { - GGML_ASSERT(ggml_nrows(src1) == 1); - const int64_t ne00 = src0->ne[0]; const int64_t row_diff = row_high - row_low; switch (src0->type) { case GGML_TYPE_Q4_0: - mul_mat_vec_q4_0_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + mul_mat_vec_q_cuda + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); break; case GGML_TYPE_Q4_1: - mul_mat_vec_q4_1_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + mul_mat_vec_q_cuda + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); break; case GGML_TYPE_Q5_0: - mul_mat_vec_q5_0_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + mul_mat_vec_q_cuda + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); break; case GGML_TYPE_Q5_1: - mul_mat_vec_q5_1_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + mul_mat_vec_q_cuda + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); break; case GGML_TYPE_Q8_0: - mul_mat_vec_q8_0_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + mul_mat_vec_q_cuda + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); break; case GGML_TYPE_Q2_K: - mul_mat_vec_q2_K_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + mul_mat_vec_q_cuda + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); break; case GGML_TYPE_Q3_K: - mul_mat_vec_q3_K_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + mul_mat_vec_q_cuda + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); break; case GGML_TYPE_Q4_K: - mul_mat_vec_q4_K_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + mul_mat_vec_q_cuda + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); break; case GGML_TYPE_Q5_K: - mul_mat_vec_q5_K_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + mul_mat_vec_q_cuda + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); break; case GGML_TYPE_Q6_K: - mul_mat_vec_q6_K_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + mul_mat_vec_q_cuda + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); break; case GGML_TYPE_IQ2_XXS: - mul_mat_vec_iq2_xxs_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + mul_mat_vec_q_cuda + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); break; case GGML_TYPE_IQ2_XS: - mul_mat_vec_iq2_xs_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + mul_mat_vec_q_cuda + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); break; case GGML_TYPE_IQ3_XXS: - mul_mat_vec_iq3_xxs_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + mul_mat_vec_q_cuda + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); break; default: GGML_ASSERT(false); @@ -9945,17 +9900,18 @@ static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1 #ifdef GGML_CUDA_FORCE_DMMV const bool use_mul_mat_vec_q = false; #else - const bool use_mul_mat_vec_q = min_compute_capability >= MIN_CC_DP4A && ggml_is_quantized(src0->type) && ggml_nrows(src1) == 1; + const bool use_mul_mat_vec_q = min_compute_capability >= MIN_CC_DP4A && ggml_is_quantized(src0->type); #endif // GGML_CUDA_FORCE_DMMV if (use_mul_mat_vec_q) { - // NOTE: this kernel does not support ggml_nrows(src1) > 1 ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_vec_q, true); } else { ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_dequantize_mul_mat_vec, false); } } else { - if (use_mul_mat_q) { + if (src1->ne[1] <= 8 && min_compute_capability >= MIN_CC_DP4A && ggml_is_quantized(src0->type)) { + ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_vec_q, true); + } else if (use_mul_mat_q) { ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_q, true); } else { ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_cublas, false); From 2e9c0bd6b301155ce749e162527fc55e9fb5b832 Mon Sep 17 00:00:00 2001 From: BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com> Date: Tue, 6 Feb 2024 09:06:48 -0500 Subject: [PATCH 40/94] readme : add phi, orion 14b, internlm2, and yi-VL to readme (#5362) --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bb6c49338..cc87ac797 100644 --- a/README.md +++ b/README.md @@ -105,11 +105,14 @@ improved significantly thanks to many contributions. It is the main playground f - [X] [MPT](https://github.com/ggerganov/llama.cpp/pull/3417) - [X] [Bloom](https://github.com/ggerganov/llama.cpp/pull/3553) - [x] [Yi models](https://huggingface.co/models?search=01-ai/Yi) -- [X] [StableLM-3b-4e1t](https://github.com/ggerganov/llama.cpp/pull/3586) +- [X] [StableLM models](https://huggingface.co/stabilityai) - [x] [Deepseek models](https://huggingface.co/models?search=deepseek-ai/deepseek) - [x] [Qwen models](https://huggingface.co/models?search=Qwen/Qwen) - [x] [PLaMo-13B](https://github.com/ggerganov/llama.cpp/pull/3557) +- [x] [Phi models](https://huggingface.co/models?search=microsoft/phi) - [x] [GPT-2](https://huggingface.co/gpt2) +- [x] [Orion 14B](https://github.com/ggerganov/llama.cpp/pull/5118) +- [x] [InternLM2](https://huggingface.co/models?search=internlm2) - [x] [CodeShell](https://github.com/WisdomShell/codeshell) **Multimodal models:** @@ -119,6 +122,7 @@ improved significantly thanks to many contributions. It is the main playground f - [x] [Obsidian](https://huggingface.co/NousResearch/Obsidian-3B-V0.5) - [x] [ShareGPT4V](https://huggingface.co/models?search=Lin-Chen/ShareGPT4V) - [x] [MobileVLM 1.7B/3B models](https://huggingface.co/models?search=mobileVLM) +- [x] [Yi-VL](https://huggingface.co/models?search=Yi-VL) **Bindings:** From f57fadc009cbff741a1961cb7896c47d73978d2c Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Tue, 6 Feb 2024 17:28:02 +0200 Subject: [PATCH 41/94] Slight quantization improvement for Q4_K and Q5_K (#5361) * Q4_K: slightly better quantization * Q5_K: slightly better quantization --------- Co-authored-by: Iwan Kawrakow --- ggml-quants.c | 75 +++++++++++++++++++++++---------------------------- 1 file changed, 33 insertions(+), 42 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index 014c0525a..101d3e783 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -2381,19 +2381,20 @@ static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restri uint8_t L[QK_K]; uint8_t Laux[32]; + uint8_t Ls[QK_K/32]; + uint8_t Lm[QK_K/32]; float weights[32]; - float mins[QK_K/32]; - float scales[QK_K/32]; + float sw[QK_K/32]; + float mins[QK_K/32]; + float scales[QK_K/32]; for (int i = 0; i < nb; i++) { float sum_x2 = 0; for (int l = 0; l < QK_K; ++l) sum_x2 += x[l] * x[l]; - float sigma2 = sum_x2/QK_K; + float sigma2 = 2*sum_x2/QK_K; float av_x = sqrtf(sigma2); - float max_scale = 0; // as we are deducting the min, scales are always positive - float max_min = 0; for (int j = 0; j < QK_K/32; ++j) { if (quant_weights) { const float * qw = quant_weights + QK_K*i + 32*j; @@ -2401,25 +2402,17 @@ static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restri } else { for (int l = 0; l < 32; ++l) weights[l] = av_x + fabsf(x[32*j + l]); } + float sumw = 0; + for (int l = 0; l < 32; ++l) sumw += weights[l]; + sw[j] = sumw; scales[j] = make_qkx3_quants(32, 15, x + 32*j, weights, L + 32*j, &mins[j], Laux, -0.9f, 0.05f, 36, false); - //scales[j] = make_qkx2_quants(32, 15, x + 32*j, weights, L + 32*j, &mins[j], Laux, -1.f, 0.1f, 20, false); - float scale = scales[j]; - if (scale > max_scale) { - max_scale = scale; - } - float min = mins[j]; - if (min > max_min) { - max_min = min; - } } - float inv_scale = max_scale > 0 ? 63.f/max_scale : 0.f; - float inv_min = max_min > 0 ? 63.f/max_min : 0.f; + float d_block = make_qp_quants(QK_K/32, 63, scales, Ls, sw); + float m_block = make_qp_quants(QK_K/32, 63, mins, Lm, sw); for (int j = 0; j < QK_K/32; ++j) { - uint8_t ls = nearest_int(inv_scale*scales[j]); - uint8_t lm = nearest_int(inv_min*mins[j]); - ls = MIN(63, ls); - lm = MIN(63, lm); + uint8_t ls = Ls[j]; + uint8_t lm = Lm[j]; if (j < 4) { y[i].scales[j] = ls; y[i].scales[j+4] = lm; @@ -2429,8 +2422,8 @@ static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restri y[i].scales[j-0] |= ((lm >> 4) << 6); } } - y[i].d = GGML_FP32_TO_FP16(max_scale/63.f); - y[i].dmin = GGML_FP32_TO_FP16(max_min/63.f); + y[i].d = GGML_FP32_TO_FP16(d_block); + y[i].dmin = GGML_FP32_TO_FP16(m_block); uint8_t sc, m; for (int j = 0; j < QK_K/32; ++j) { @@ -2688,20 +2681,21 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri const int nb = n_per_row / QK_K; uint8_t L[QK_K]; - float mins[QK_K/32]; - float scales[QK_K/32]; - float weights[32]; uint8_t Laux[32]; + uint8_t Ls[QK_K/32]; + uint8_t Lm[QK_K/32]; + float mins[QK_K/32]; + float scales[QK_K/32]; + float sw[QK_K/32]; + float weights[32]; for (int i = 0; i < nb; i++) { float sum_x2 = 0; for (int l = 0; l < QK_K; ++l) sum_x2 += x[l] * x[l]; - float sigma2 = sum_x2/QK_K; + float sigma2 = 2*sum_x2/QK_K; float av_x = sqrtf(sigma2); - float max_scale = 0; // as we are deducting the min, scales are always positive - float max_min = 0; for (int j = 0; j < QK_K/32; ++j) { if (quant_weights) { const float * qw = quant_weights + QK_K*i + 32*j; @@ -2709,22 +2703,19 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri } else { for (int l = 0; l < 32; ++l) weights[l] = av_x + fabsf(x[32*j + l]); } + float sumw = 0; + for (int l = 0; l < 32; ++l) sumw += weights[l]; + sw[j] = sumw; + scales[j] = make_qkx3_quants(32, 31, x + 32*j, weights, L + 32*j, &mins[j], Laux, -0.9f, 0.05f, 36, false); - float scale = scales[j]; - if (scale > max_scale) { - max_scale = scale; - } - float min = mins[j]; - if (min > max_min) { - max_min = min; - } } - float inv_scale = max_scale > 0 ? 63.f/max_scale : 0.f; - float inv_min = max_min > 0 ? 63.f/max_min : 0.f; + float d_block = make_qp_quants(QK_K/32, 63, scales, Ls, sw); + float m_block = make_qp_quants(QK_K/32, 63, mins, Lm, sw); + for (int j = 0; j < QK_K/32; ++j) { - uint8_t ls = nearest_int(inv_scale*scales[j]); - uint8_t lm = nearest_int(inv_min*mins[j]); + uint8_t ls = Ls[j]; + uint8_t lm = Lm[j]; ls = MIN(63, ls); lm = MIN(63, lm); if (j < 4) { @@ -2736,8 +2727,8 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri y[i].scales[j-0] |= ((lm >> 4) << 6); } } - y[i].d = GGML_FP32_TO_FP16(max_scale/63.f); - y[i].dmin = GGML_FP32_TO_FP16(max_min/63.f); + y[i].d = GGML_FP32_TO_FP16(d_block); + y[i].dmin = GGML_FP32_TO_FP16(m_block); uint8_t sc, m; for (int j = 0; j < QK_K/32; ++j) { From b08f22c882a1443e6b97081f3ce718a4d1a741f8 Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Tue, 6 Feb 2024 19:00:16 +0200 Subject: [PATCH 42/94] Update README.md (#5366) Add some links to quantization related PRs --- README.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index cc87ac797..34f2021f9 100644 --- a/README.md +++ b/README.md @@ -736,9 +736,21 @@ Several quantization methods are supported. They differ in the resulting model d | 13B | bits/weight | 16.0 | 4.5 | 5.0 | 5.5 | 6.0 | 8.5 | - [k-quants](https://github.com/ggerganov/llama.cpp/pull/1684) -- recent k-quants improvements +- recent k-quants improvements and new i-quants - [#2707](https://github.com/ggerganov/llama.cpp/pull/2707) - [#2807](https://github.com/ggerganov/llama.cpp/pull/2807) + - [#4773 - 2-bit i-quants (inference)](https://github.com/ggerganov/llama.cpp/pull/4773) + - [#4856 - 2-bit i-quants (inference)](https://github.com/ggerganov/llama.cpp/pull/4856) + - [#4861 - importance matrix](https://github.com/ggerganov/llama.cpp/pull/4861) + - [#4872 - MoE models](https://github.com/ggerganov/llama.cpp/pull/4872) + - [#4897 - 2-bit quantization](https://github.com/ggerganov/llama.cpp/pull/4897) + - [#4930 - imatrix for all k-quants](https://github.com/ggerganov/llama.cpp/pull/4930) + - [#4951 - imatrix on the GPU](https://github.com/ggerganov/llama.cpp/pull/4957) + - [#4969 - imatrix for legacy quants](https://github.com/ggerganov/llama.cpp/pull/4969) + - [#4996 - k-qunats tuning](https://github.com/ggerganov/llama.cpp/pull/4996) + - [#5060 - Q3_K_XS](https://github.com/ggerganov/llama.cpp/pull/5060) + - [#5196 - 3-bit i-quants](https://github.com/ggerganov/llama.cpp/pull/5196) + - [quantization tuning](https://github.com/ggerganov/llama.cpp/pull/5320), [another one](https://github.com/ggerganov/llama.cpp/pull/5334), and [another one](https://github.com/ggerganov/llama.cpp/pull/5361) ### Perplexity (measuring model quality) From 17c97fb0620448b37516a3f53fea6c482b0a30a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Tue, 6 Feb 2024 18:43:06 +0100 Subject: [PATCH 43/94] CUDA: mul_mat_vec_q max. batch size 8 -> 4 (#5370) --- ggml-cuda.cu | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 95161b3f4..3b828375e 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -6831,7 +6831,7 @@ static void mul_mat_vec_q_cuda( const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, cudaStream_t stream) { GGML_ASSERT(ncols_x % qk == 0); - GGML_ASSERT(ncols_y <= 8); + GGML_ASSERT(ncols_y <= 4); const int block_num_y = (nrows_x + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; const dim3 block_nums(block_num_y, 1, 1); @@ -6853,22 +6853,22 @@ static void mul_mat_vec_q_cuda( mul_mat_vec_q<4, qk, qi, block_q_t, vdr, vec_dot> <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); break; - case 5: - mul_mat_vec_q<5, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); - break; - case 6: - mul_mat_vec_q<6, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); - break; - case 7: - mul_mat_vec_q<7, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); - break; - case 8: - mul_mat_vec_q<8, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); - break; + // case 5: + // mul_mat_vec_q<5, qk, qi, block_q_t, vdr, vec_dot> + // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + // break; + // case 6: + // mul_mat_vec_q<6, qk, qi, block_q_t, vdr, vec_dot> + // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + // break; + // case 7: + // mul_mat_vec_q<7, qk, qi, block_q_t, vdr, vec_dot> + // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + // break; + // case 8: + // mul_mat_vec_q<8, qk, qi, block_q_t, vdr, vec_dot> + // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + // break; default: GGML_ASSERT(false); // mul_mat_vec_q<0, qk, qi, block_q_t, vdr, vec_dot> @@ -9909,7 +9909,7 @@ static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1 ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_dequantize_mul_mat_vec, false); } } else { - if (src1->ne[1] <= 8 && min_compute_capability >= MIN_CC_DP4A && ggml_is_quantized(src0->type)) { + if (src1->ne[1] <= 4 && min_compute_capability >= MIN_CC_DP4A && ggml_is_quantized(src0->type)) { ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_vec_q, true); } else if (use_mul_mat_q) { ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_q, true); From 213d1439fadefe182f69c5f7e8dd3b4b6572ebcb Mon Sep 17 00:00:00 2001 From: Alexey Parfenov Date: Tue, 6 Feb 2024 18:08:38 +0000 Subject: [PATCH 44/94] server : remove model.json endpoint (#5371) --- examples/server/completion.js.hpp | 448 +++++++++++++++------------ examples/server/public/completion.js | 3 +- examples/server/server.cpp | 11 - 3 files changed, 244 insertions(+), 218 deletions(-) diff --git a/examples/server/completion.js.hpp b/examples/server/completion.js.hpp index fe5f81228..f5e696e17 100644 --- a/examples/server/completion.js.hpp +++ b/examples/server/completion.js.hpp @@ -236,214 +236,250 @@ unsigned char completion_js[] = { 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x65, - 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e, - 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x24, - 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, - 0x72, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x7d, 0x60, 0x29, - 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, + 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x63, 0x6f, 0x6e, 0x74, + 0x65, 0x6e, 0x74, 0x2e, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73, + 0x28, 0x27, 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61, + 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x29, 0x20, 0x7b, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x54, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x61, + 0x6e, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, 0x62, + 0x65, 0x20, 0x63, 0x61, 0x75, 0x67, 0x68, 0x74, 0x20, 0x62, 0x79, 0x20, + 0x75, 0x70, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6c, + 0x6c, 0x65, 0x72, 0x73, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, + 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x27, + 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61, 0x69, 0x6c, + 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x65, 0x6c, + 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, + 0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, + 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x3a, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, + 0x74, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x20, - 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, - 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61, 0x6d, 0x65, 0x20, 0x21, 0x3d, 0x3d, - 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72, 0x74, 0x45, 0x72, 0x72, 0x6f, 0x72, - 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, - 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, - 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x20, 0x65, 0x72, 0x72, 0x6f, - 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, - 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x66, - 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, - 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, - 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, - 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x63, - 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, - 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, - 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x61, 0x6e, 0x20, - 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, - 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x79, 0x6f, 0x75, 0x20, 0x63, 0x61, - 0x6e, 0x20, 0x73, 0x75, 0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x20, - 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, - 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, - 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x7b, 0x20, - 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, - 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, - 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, - 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, - 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x6e, - 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, - 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28, 0x70, 0x72, 0x6f, 0x6d, - 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, - 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4c, - 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x28, 0x22, 0x6d, 0x65, 0x73, - 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, - 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, - 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, - 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x2e, 0x63, 0x6f, 0x6e, 0x74, - 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x7d, - 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, - 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, - 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, - 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, - 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, - 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, - 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, - 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, - 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, 0x76, 0x65, 0x6e, 0x74, - 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, - 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, - 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, - 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, - 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, - 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, - 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, - 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, - 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, - 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, - 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, - 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, - 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, - 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, - 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, - 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, - 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x6d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, - 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, - 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, + 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, + 0x72, 0x73, 0x65, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, + 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, + 0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, + 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x3a, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, + 0x74, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, - 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, - 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, - 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, - 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, - 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, - 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, - 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, - 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, - 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, - 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, - 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, - 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, - 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, - 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, - 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x74, 0x69, 0x6d, 0x69, - 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, - 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x7d, - 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, - 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, - 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, - 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, - 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, - 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64, 0x6f, 0x6e, 0x65, 0x22, 0x2c, 0x20, - 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x7b, 0x20, - 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x7d, 0x20, 0x7d, 0x29, - 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x28, 0x29, 0x3b, 0x0a, 0x20, - 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, - 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, - 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20, 0x6c, 0x6c, 0x61, 0x6d, - 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x61, 0x20, - 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20, 0x74, 0x68, 0x61, 0x74, - 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x73, 0x20, 0x74, 0x6f, - 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, - 0x65, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74, 0x2e, 0x20, 0x54, 0x68, 0x69, - 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x73, - 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x73, 0x74, 0x72, 0x65, 0x61, - 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x45, - 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, - 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, - 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, - 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, 0x28, 0x63, 0x6f, 0x6e, - 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x2f, - 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, + 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, + 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61, 0x6d, 0x65, + 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72, 0x74, 0x45, + 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x65, + 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x20, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20, 0x65, 0x29, + 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20, 0x20, 0x7d, + 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79, 0x20, 0x7b, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, + 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28, 0x29, 0x3b, + 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, + 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, + 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20, 0x6c, + 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, + 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20, 0x74, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x79, 0x6f, + 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x73, 0x63, 0x72, + 0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, + 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x2f, 0x2f, + 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6f, 0x72, + 0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, + 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d, 0x20, 0x66, + 0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, + 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f, 0x2f, 0x0a, + 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, + 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28, + 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, + 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64, 0x45, 0x76, + 0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x28, + 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20, 0x28, + 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, + 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63, - 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, - 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, - 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, - 0x69, 0x73, 0x65, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, - 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, - 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, - 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, - 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, - 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20, - 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, - 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x2c, 0x20, - 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x29, - 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, - 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x50, 0x72, 0x6f, 0x6d, 0x69, - 0x73, 0x65, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x72, 0x65, - 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, - 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, - 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x74, 0x72, - 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, - 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, - 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, - 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, - 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, - 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, - 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x28, 0x63, 0x6f, - 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x72, 0x72, - 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, - 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, - 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a, 0x0a, 0x2f, 0x2a, 0x2a, 0x0a, 0x20, - 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, - 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, - 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, - 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x20, 0x3d, 0x20, - 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, - 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, - 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62, 0x61, 0x63, 0x6b, 0x29, - 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, - 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, - 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, - 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x70, - 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, - 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, - 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62, 0x61, 0x63, 0x6b, 0x28, 0x63, - 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x7d, - 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47, 0x65, 0x74, 0x20, 0x74, 0x68, 0x65, - 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x20, 0x69, 0x6e, 0x66, 0x6f, 0x20, - 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68, 0x65, 0x20, 0x73, 0x65, 0x72, - 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, - 0x20, 0x75, 0x73, 0x65, 0x66, 0x75, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x20, - 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x20, 0x74, 0x68, 0x65, 0x20, - 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x20, 0x77, 0x69, 0x6e, 0x64, - 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x73, 0x6f, 0x20, 0x6f, 0x6e, - 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, - 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x4d, 0x6f, 0x64, 0x65, - 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, - 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, - 0x69, 0x66, 0x20, 0x28, 0x21, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, - 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x67, 0x65, 0x6e, 0x65, - 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, - 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, - 0x66, 0x65, 0x74, 0x63, 0x68, 0x28, 0x22, 0x2f, 0x6d, 0x6f, 0x64, 0x65, - 0x6c, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x22, 0x29, 0x2e, 0x74, 0x68, 0x65, - 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20, 0x72, 0x2e, 0x6a, 0x73, 0x6f, - 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, - 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, + 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x2e, + 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, + 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, + 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, + 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, + 0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, + 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x7b, + 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x20, + 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, + 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28, 0x29, + 0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, + 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, + 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, + 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, + 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, + 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, + 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, + 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, + 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, + 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, + 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, + 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, + 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20, + 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, + 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d, 0x29, 0x29, + 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, + 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, - 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a + 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, + 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, + 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67, 0x65, 0x6e, + 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, + 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, + 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, + 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, + 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, + 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, + 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, + 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, + 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, + 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, + 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69, 0x6e, + 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, + 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, + 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64, 0x6f, 0x6e, + 0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, + 0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, + 0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x28, + 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, + 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x3b, + 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20, + 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, + 0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20, + 0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, + 0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6d, + 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74, 0x2e, + 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x20, 0x6e, + 0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x73, + 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f, 0x2f, 0x0a, + 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, + 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6c, + 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, + 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, + 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20, 0x3d, 0x3e, + 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, + 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, + 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, + 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72, 0x0a, 0x2f, + 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, + 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, + 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72, 0x6f, 0x6d, + 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, + 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, + 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, + 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, + 0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, + 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, + 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, + 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, + 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x50, + 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, + 0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c, 0x20, 0x72, + 0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, + 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, + 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, + 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, + 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, + 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, + 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, + 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, + 0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, + 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x3b, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x20, + 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x65, + 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, + 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a, 0x0a, 0x2f, + 0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70, 0x72, 0x65, + 0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f, 0x0a, 0x65, + 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, + 0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x70, + 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, + 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62, + 0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, + 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, + 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61, 0x72, 0x61, + 0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, + 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, + 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29, 0x29, 0x20, + 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62, 0x61, + 0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b, 0x0a, 0x20, + 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47, 0x65, 0x74, + 0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x20, 0x69, + 0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68, 0x65, + 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54, 0x68, 0x69, + 0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75, 0x6c, 0x20, + 0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x20, + 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x20, + 0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x73, + 0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, + 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, + 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20, 0x3d, 0x20, + 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, + 0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67, 0x65, 0x6e, + 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, + 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x20, + 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63, + 0x68, 0x28, 0x22, 0x2f, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x22, 0x29, 0x2e, + 0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20, 0x72, 0x2e, + 0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, + 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, + 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x70, + 0x72, 0x6f, 0x70, 0x73, 0x2e, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, + 0x5f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, + 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x20, 0x20, + 0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x67, + 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, + 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a }; -unsigned int completion_js_len = 5346; +unsigned int completion_js_len = 5782; diff --git a/examples/server/public/completion.js b/examples/server/public/completion.js index baaec1d60..ab38a7b40 100644 --- a/examples/server/public/completion.js +++ b/examples/server/public/completion.js @@ -195,7 +195,8 @@ export const llamaComplete = async (params, controller, callback) => { // Get the model info from the server. This is useful for getting the context window and so on. export const llamaModelInfo = async () => { if (!generation_settings) { - generation_settings = await fetch("/model.json").then(r => r.json()); + const props = await fetch("/props").then(r => r.json()); + generation_settings = props.default_generation_settings; } return generation_settings; } diff --git a/examples/server/server.cpp b/examples/server/server.cpp index d86d7e04a..9481ce6b1 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -990,11 +990,6 @@ struct llama_server_context queue_results.send(res); } - json get_model_props() - { - return get_formated_generation(slots[0]); - } - json get_formated_generation(llama_client_slot &slot) { const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model)); @@ -2895,12 +2890,6 @@ int main(int argc, char **argv) } }); - svr.Get("/model.json", [&llama](const httplib::Request &, httplib::Response &res) - { - const json data = llama.get_model_props(); - return res.set_content(data.dump(), "application/json; charset=utf-8"); - }); - svr.Options(R"(/.*)", [](const httplib::Request &, httplib::Response &res) { return res.set_content("", "application/json; charset=utf-8"); }); From f68664ac241a6b5c233d8f1051eef20929b06008 Mon Sep 17 00:00:00 2001 From: Sang-Kil Park Date: Wed, 7 Feb 2024 13:28:00 +0900 Subject: [PATCH 45/94] convert : fix TypeError on GPT-2 vocab.json (#5288) --- convert.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/convert.py b/convert.py index 4a2847a27..323e8058d 100755 --- a/convert.py +++ b/convert.py @@ -334,9 +334,9 @@ class Params: class BpeVocab: def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None: self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read()) - try: + if isinstance(self.bpe_tokenizer.get('model'), dict): self.vocab = self.bpe_tokenizer["model"]["vocab"] - except KeyError: + else: self.vocab = self.bpe_tokenizer added_tokens: dict[str, int] if fname_added_tokens is not None: From f3e2b4fa3f81a410ecb7dec929c259ef8d8dbb7d Mon Sep 17 00:00:00 2001 From: Justin Parker Date: Wed, 7 Feb 2024 01:15:19 -0500 Subject: [PATCH 46/94] server : update `/props` with "total_slots" value (#5373) * include total "num_slots" in default_generation_settings_for_props * cleanup total_slots return value in /props endpoint * update /props endpoint docs with total_slots * remove num_slots from default_generation_settings_for_props * update /props endpoint section --- examples/server/README.md | 4 +++- examples/server/server.cpp | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index 46d8f85ae..1db7cdf21 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -276,13 +276,15 @@ Notice that each `probs` is an array of length `n_probs`. { "assistant_name": "", "user_name": "", - "default_generation_settings": { ... } + "default_generation_settings": { ... }, + "total_slots": 1 } ``` - `assistant_name` - the required assistant name to generate the prompt in case you have specified a system prompt for all slots. - `user_name` - the required anti-prompt to generate the prompt in case you have specified a system prompt for all slots. - `default_generation_settings` - the default generation settings for the `/completion` endpoint, has the same fields as the `generation_settings` response object from the `/completion` endpoint. +- `total_slots` - the total number of slots for process requests (defined by `--parallel` option) - **POST** `/v1/chat/completions`: OpenAI-compatible Chat Completions API. Given a ChatML-formatted json description in `messages`, it returns the predicted completion. Both synchronous and streaming mode are supported, so scripted and interactive applications work fine. While no strong claims of compatibility with OpenAI API spec is being made, in our experience it suffices to support many apps. Only ChatML-tuned models, such as Dolphin, OpenOrca, OpenHermes, OpenChat-3.5, etc can be used with this endpoint. Compared to `api_like_OAI.py` this API implementation does not require a wrapper to be served. diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 9481ce6b1..eceda30d0 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -432,7 +432,6 @@ struct llama_server_context } default_generation_settings_for_props = get_formated_generation(slots.front()); - default_generation_settings_for_props["num_slots"] = params.n_parallel; default_generation_settings_for_props["seed"] = -1; batch = llama_batch_init(n_ctx, 0, params.n_parallel); @@ -2639,7 +2638,8 @@ int main(int argc, char **argv) json data = { { "user_name", llama.name_user.c_str() }, { "assistant_name", llama.name_assistant.c_str() }, - { "default_generation_settings", llama.default_generation_settings_for_props } + { "default_generation_settings", llama.default_generation_settings_for_props }, + { "total_slots", llama.params.n_parallel } }; res.set_content(data.dump(), "application/json; charset=utf-8"); }); From 316c7faf7740fa98ea68f1445f4505810f706b9e Mon Sep 17 00:00:00 2001 From: runfuture Date: Wed, 7 Feb 2024 14:15:56 +0800 Subject: [PATCH 47/94] llama : add MiniCPM support (#5346) * support minicpm arch. * fix tab/space typo. * convert minicpm model via convert-hf-gguf.py * try to make tokenizer work * fix bug for quantize minicpm * fix for flake8 lint * remove convert-minicpm.py * fix for editorconfig * correct minicpm model type (size) * constants expanded for minicpm * Minor change of the constant names for minicpm --- convert-hf-to-gguf.py | 49 ++++++++++ gguf-py/gguf/constants.py | 21 +++++ llama.cpp | 190 +++++++++++++++++++++++++++++++++++++- 3 files changed, 259 insertions(+), 1 deletion(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 5e343742d..829d68368 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -22,6 +22,8 @@ if 'NO_LOCAL_GGUF' not in os.environ: sys.path.insert(1, str(Path(__file__).parent / 'gguf-py')) import gguf +from convert import HfVocab + # check for any of the given keys in the dictionary and return the value of the first key found def get_key_opts(d, keys): @@ -205,6 +207,8 @@ class Model: return OrionModel if model_architecture == "InternLM2ForCausalLM": return InternLM2Model + if model_architecture == "MiniCPMForCausalLM": + return MiniCPMModel return Model def _is_model_safetensors(self) -> bool: @@ -258,6 +262,8 @@ class Model: return gguf.MODEL_ARCH.ORION if arch == "InternLM2ForCausalLM": return gguf.MODEL_ARCH.INTERNLM2 + if arch == "MiniCPMForCausalLM": + return gguf.MODEL_ARCH.MINICPM raise NotImplementedError(f'Architecture "{arch}" not supported!') @@ -402,6 +408,31 @@ class Model: special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) special_vocab.add_to_gguf(self.gguf_writer) + def _set_vocab_hf(self): + path = self.dir_model + added_tokens_path = self.dir_model + vocab = HfVocab( + path, added_tokens_path if added_tokens_path.exists() else None + ) + tokens = [] + scores = [] + toktypes = [] + + for text, score, toktype in vocab.all_tokens(): + tokens.append(text) + scores.append(score) + toktypes.append(toktype) + + assert len(tokens) == vocab.vocab_size + + self.gguf_writer.add_tokenizer_model("llama") + self.gguf_writer.add_token_list(tokens) + self.gguf_writer.add_token_scores(scores) + self.gguf_writer.add_token_types(toktypes) + + special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) + special_vocab.add_to_gguf(self.gguf_writer) + class GPTNeoXModel(Model): def set_gguf_parameters(self): @@ -1041,6 +1072,24 @@ class MixtralModel(Model): self._set_vocab_sentencepiece() +class MiniCPMModel(Model): + def set_gguf_parameters(self): + block_count = self.hparams["num_hidden_layers"] + self.gguf_writer.add_name("MiniCPM") + self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) + self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) + self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) + self.gguf_writer.add_block_count(block_count) + self.gguf_writer.add_head_count(self.hparams["num_attention_heads"]) + self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"]) + self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"]) + self.gguf_writer.add_file_type(self.ftype) + self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"]) + + def set_vocab(self): + self._set_vocab_hf() + + class QwenModel(Model): @staticmethod def token_bytes_to_string(b): diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index ed8e26f83..1cfd41c0b 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -104,6 +104,7 @@ class MODEL_ARCH(IntEnum): CODESHELL = auto() ORION = auto() INTERNLM2 = auto() + MINICPM = auto() class MODEL_TENSOR(IntEnum): @@ -156,6 +157,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { MODEL_ARCH.CODESHELL: "codeshell", MODEL_ARCH.ORION: "orion", MODEL_ARCH.INTERNLM2: "internlm2", + MODEL_ARCH.MINICPM: "minicpm", } TENSOR_NAMES: dict[MODEL_TENSOR, str] = { @@ -464,6 +466,25 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_UP, ], + MODEL_ARCH.MINICPM: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.ROPE_FREQS, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.ATTN_ROT_EMBD, + MODEL_TENSOR.FFN_GATE_INP, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.FFN_GATE_EXP, + MODEL_TENSOR.FFN_DOWN_EXP, + MODEL_TENSOR.FFN_UP_EXP, + ], # TODO } diff --git a/llama.cpp b/llama.cpp index 65e399adc..f3c5146d1 100644 --- a/llama.cpp +++ b/llama.cpp @@ -205,6 +205,7 @@ enum llm_arch { LLM_ARCH_CODESHELL, LLM_ARCH_ORION, LLM_ARCH_INTERNLM2, + LLM_ARCH_MINICPM, LLM_ARCH_UNKNOWN, }; @@ -228,6 +229,7 @@ static std::map LLM_ARCH_NAMES = { { LLM_ARCH_CODESHELL, "codeshell" }, { LLM_ARCH_ORION, "orion" }, { LLM_ARCH_INTERNLM2, "internlm2" }, + { LLM_ARCH_MINICPM, "minicpm" }, }; enum llm_kv { @@ -690,6 +692,29 @@ static std::map> LLM_TENSOR_NAMES = { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, }, }, + { + LLM_ARCH_MINICPM, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ROPE_FREQS, "rope_freqs" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" }, + { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" }, + { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" }, + { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" }, + }, + }, { LLM_ARCH_UNKNOWN, { @@ -1390,6 +1415,7 @@ enum e_model { MODEL_UNKNOWN, MODEL_0_5B, MODEL_1B, + MODEL_2B, MODEL_3B, MODEL_4B, MODEL_7B, @@ -2748,6 +2774,7 @@ static std::string llama_model_ftype_name(llama_ftype ftype) { static const char * llama_model_type_name(e_model type) { switch (type) { case MODEL_1B: return "1B"; + case MODEL_2B: return "2B"; case MODEL_3B: return "3B"; case MODEL_7B: return "7B"; case MODEL_8B: return "8B"; @@ -2887,6 +2914,13 @@ static void llm_load_hparams( default: model.type = e_model::MODEL_UNKNOWN; } } break; + case LLM_ARCH_MINICPM: + { + switch (hparams.n_layer) { + case 40: model.type = e_model::MODEL_2B; break; + default: model.type = e_model::MODEL_UNKNOWN; + } + } break; case LLM_ARCH_FALCON: { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps); @@ -3524,13 +3558,16 @@ static bool llm_load_tensors( switch (model.arch) { case LLM_ARCH_LLAMA: case LLM_ARCH_REFACT: + case LLM_ARCH_MINICPM: { model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); // output { model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}); - model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}); + if (model.arch != LLM_ARCH_MINICPM){ + model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}); + } } for (int i = 0; i < n_layer; ++i) { @@ -6781,6 +6818,153 @@ struct llm_build_context { return gf; } + // ref: https://arxiv.org/abs/2203.03466 + // https://github.com/ggerganov/llama.cpp/issues/5276#issuecomment-1925774738 + // based on the original build_llama() function + struct ggml_cgraph * build_minicpm() { + struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); + + const int64_t n_embd_head = hparams.n_embd_head_v; + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); + + const int64_t n_embd = hparams.n_embd; + //TODO: if the model varies, these parameters need to be read from the model + const int64_t n_embd_base = 256; + const float scale_embd = 12.0f; + const float scale_depth = 1.4f; + + struct ggml_tensor * cur; + struct ggml_tensor * inpL; + + inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); + cb(inpL, "inp_embd", -1); + + // scale the input embeddings + inpL = ggml_scale(ctx0, inpL, scale_embd); + cb(inpL, "inp_scaled", -1); + + // inp_pos - contains the positions + struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); + cb(inp_pos, "inp_pos", -1); + + // KQ_mask (mask for 1 head, it will be broadcasted to all heads) + struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); + cb(KQ_mask, "KQ_mask", -1); + + // shift the entire K-cache if needed + if (do_rope_shift) { + llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb); + } + + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor * inpSA = inpL; + + // norm + cur = llm_build_norm(ctx0, inpL, hparams, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, cb, il); + cb(cur, "attn_norm", il); + + // self-attention + { + // compute Q and K and RoPE them + struct ggml_tensor * Qcur = ggml_mul_mat(ctx0, model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + } + + struct ggml_tensor * Kcur = ggml_mul_mat(ctx0, model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + if (model.layers[il].bk) { + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + cb(Kcur, "Kcur", il); + } + + struct ggml_tensor * Vcur = ggml_mul_mat(ctx0, model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + if (model.layers[il].bv) { + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + cb(Vcur, "Vcur", il); + } + + Qcur = ggml_rope_custom( + ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, + hparams.n_rot, 0, 0, n_orig_ctx, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + cb(Qcur, "Qcur", il); + + Kcur = ggml_rope_custom( + ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, + hparams.n_rot, 0, 0, n_orig_ctx, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + cb(Kcur, "Kcur", il); + + cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, + model.layers[il].wo, model.layers[il].bo, + Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, -1.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il); + cb(cur, "kqv_out", il); + } + + // scale_res - scale the hidden states for residual connection + const float scale_res = scale_depth/sqrtf(float(n_layer)); + cur = ggml_scale(ctx0, cur, scale_res); + cb(cur, "hidden_scaled", -1); + + struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + // feed-forward network + { + cur = llm_build_norm(ctx0, ffn_inp, hparams, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, cb, il); + cb(cur, "ffn_norm", il); + + cur = llm_build_ffn(ctx0, cur, + model.layers[il].ffn_up, NULL, + model.layers[il].ffn_gate, NULL, + model.layers[il].ffn_down, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, cb, il); + cb(cur, "ffn_out", il); + } + + // scale the hidden states for residual connection + cur = ggml_scale(ctx0, cur, scale_res); + cb(cur, "hidden_scaled_ffn", -1); + + cur = ggml_add(ctx0, cur, ffn_inp); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = llm_build_norm(ctx0, cur, hparams, + model.output_norm, NULL, + LLM_NORM_RMS, cb, -1); + cb(cur, "result_norm", -1); + + // lm_head scaling + const float scale_lmhead = float(n_embd_base)/float(n_embd); + cur = ggml_scale(ctx0, cur, scale_lmhead); + cb(cur, "lmhead_scaling", -1); + + // lm_head + cur = ggml_mul_mat(ctx0, model.tok_embd, cur); + cb(cur, "result_output", -1); + + ggml_build_forward_expand(gf, cur); + + return gf; + } }; static struct ggml_cgraph * llama_build_graph( @@ -6943,6 +7127,10 @@ static struct ggml_cgraph * llama_build_graph( { result = llm.build_internlm2(); } break; + case LLM_ARCH_MINICPM: + { + result = llm.build_minicpm(); + } break; default: GGML_ASSERT(false); } From 9a697d842bc0cfce8268ebd2ba703ffc1c904f98 Mon Sep 17 00:00:00 2001 From: Ben Williams Date: Tue, 6 Feb 2024 22:16:48 -0800 Subject: [PATCH 48/94] readme : update ui list (#5354) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 34f2021f9..672512d18 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,7 @@ Unless otherwise noted these projects are open-source with permissive licensing: - [iohub/collama](https://github.com/iohub/coLLaMA) - [janhq/jan](https://github.com/janhq/jan) (AGPL) - [nat/openplayground](https://github.com/nat/openplayground) +- [Faraday](https://faraday.dev/) (proprietary) - [LMStudio](https://lmstudio.ai/) (proprietary) - [LostRuins/koboldcpp](https://github.com/LostRuins/koboldcpp) (AGPL) - [Mozilla-Ocho/llamafile](https://github.com/Mozilla-Ocho/llamafile) From ed0bf32290ee5b30ffad5becd99cbecef74aedd7 Mon Sep 17 00:00:00 2001 From: Eve <139727413+netrunnereve@users.noreply.github.com> Date: Wed, 7 Feb 2024 06:21:30 +0000 Subject: [PATCH 49/94] readme : modernize (#5379) * first cleanup, update everything to Llama 2 and remove outdated content * Delete SHA256SUMS * make build instructions generic * recommend Q4_K_M quantization method * Update README.md --- README.md | 127 +++++++++++++++-------------------------------------- SHA256SUMS | 40 ----------------- 2 files changed, 36 insertions(+), 131 deletions(-) delete mode 100644 SHA256SUMS diff --git a/README.md b/README.md index 672512d18..0509b0ba1 100644 --- a/README.md +++ b/README.md @@ -33,17 +33,14 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
  • Get the Code
  • Build
  • BLAS Build
  • -
  • Prepare Data & Run
  • +
  • Prepare and Quantize
  • +
  • Run the quantized model
  • Memory/Disk Requirements
  • Quantization
  • Interactive mode
  • Constrained output with grammars
  • -
  • Instruction mode with Alpaca
  • -
  • Using OpenLLaMA
  • -
  • Using GPT4All
  • -
  • Using Pygmalion 7B & Metharme 7B
  • -
  • Obtaining the Facebook LLaMA original model and Stanford Alpaca model data
  • -
  • Verifying the model files
  • +
  • Instruct mode
  • +
  • Obtaining and using the Facebook LLaMA 2 model
  • Seminal papers and background on the models
  • Perplexity (measuring model quality)
  • Android
  • @@ -83,20 +80,16 @@ improved significantly thanks to many contributions. It is the main playground f **Supported models:** +Typically finetunes of the base models below are supported as well. + - [X] LLaMA 🦙 - [x] LLaMA 2 🦙🦙 -- [X] [Mistral AI v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) +- [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) - [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral) - [X] Falcon -- [X] [Alpaca](https://github.com/ggerganov/llama.cpp#instruction-mode-with-alpaca) -- [X] [GPT4All](https://github.com/ggerganov/llama.cpp#using-gpt4all) - [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2) - [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne) -- [X] [Vicuna](https://github.com/ggerganov/llama.cpp/discussions/643#discussioncomment-5533894) - [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/) -- [X] [OpenBuddy 🐶 (Multilingual)](https://github.com/OpenBuddy/OpenBuddy) -- [X] [Pygmalion/Metharme](#using-pygmalion-7b--metharme-7b) -- [X] [WizardLM](https://github.com/nlpxucan/WizardLM) - [X] [Baichuan 1 & 2](https://huggingface.co/models?search=baichuan-inc/Baichuan) + [derivations](https://huggingface.co/hiyouga/baichuan-7b-sft) - [X] [Aquila 1 & 2](https://huggingface.co/models?search=BAAI/Aquila) - [X] [Starcoder models](https://github.com/ggerganov/llama.cpp/pull/3187) @@ -166,7 +159,7 @@ Unless otherwise noted these projects are open-source with permissive licensing: Here is a typical run using LLaMA v2 13B on M2 Ultra: -```java +``` $ make -j && ./main -m models/llama-13b-v2/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e I llama.cpp build info: I UNAME_S: Darwin @@ -250,7 +243,7 @@ https://user-images.githubusercontent.com/1991296/224442907-7693d4be-acaa-4e01-8 ## Usage -Here are the end-to-end binary build and model conversion steps for the LLaMA-7B model. +Here are the end-to-end binary build and model conversion steps for most supported models. ### Get the Code @@ -635,7 +628,7 @@ Building the program with BLAS support may lead to some performance improvements **Without docker**: - Firstly, you need to make sure you installed [Vulkan SDK](https://vulkan.lunarg.com/doc/view/latest/linux/getting_started_ubuntu.html) + Firstly, you need to make sure you have installed [Vulkan SDK](https://vulkan.lunarg.com/doc/view/latest/linux/getting_started_ubuntu.html) For example, on Ubuntu 22.04 (jammy), use the command below: @@ -648,6 +641,8 @@ Building the program with BLAS support may lead to some performance improvements vulkaninfo ``` + Alternatively your package manager might be able to provide the appropiate libraries. For example for Ubuntu 22.04 you can install `libvulkan-dev` instead. + Then, build llama.cpp using the cmake command below: ```bash @@ -662,34 +657,42 @@ Building the program with BLAS support may lead to some performance improvements # ggml_vulkan: Using Intel(R) Graphics (ADL GT2) | uma: 1 | fp16: 1 | warp size: 32 ``` -### Prepare Data & Run +### Prepare and Quantize + +To obtain the official LLaMA 2 weights please see the Obtaining and using the Facebook LLaMA 2 model section. There is also a large selection of pre-quantized `gguf` models available on Hugging Face. ```bash -# obtain the original LLaMA model weights and place them in ./models +# obtain the official LLaMA model weights and place them in ./models ls ./models -65B 30B 13B 7B tokenizer_checklist.chk tokenizer.model +llama-2-7b tokenizer_checklist.chk tokenizer.model # [Optional] for models using BPE tokenizers ls ./models -65B 30B 13B 7B vocab.json + vocab.json +# [Optional] for PyTorch .bin models like Mistral-7B +ls ./models + # install Python dependencies python3 -m pip install -r requirements.txt -# convert the 7B model to ggml FP16 format -python3 convert.py models/7B/ +# convert the model to ggml FP16 format +python3 convert.py models/mymodel/ # [Optional] for models using BPE tokenizers -python convert.py models/7B/ --vocabtype bpe +python convert.py models/mymodel/ --vocabtype bpe -# quantize the model to 4-bits (using q4_0 method) -./quantize ./models/7B/ggml-model-f16.gguf ./models/7B/ggml-model-q4_0.gguf q4_0 +# quantize the model to 4-bits (using Q4_K_M method) +./quantize ./models/mymodel/ggml-model-f16.gguf ./models/mymodel/ggml-model-Q4_K_M.gguf Q4_K_M -# update the gguf filetype to current if older version is unsupported by another application -./quantize ./models/7B/ggml-model-q4_0.gguf ./models/7B/ggml-model-q4_0-v2.gguf COPY +# update the gguf filetype to current version if older version is now unsupported +./quantize ./models/mymodel/ggml-model-Q4_K_M.gguf ./models/mymodel/ggml-model-Q4_K_M-v2.gguf COPY +``` +### Run the quantized model -# run the inference -./main -m ./models/7B/ggml-model-q4_0.gguf -n 128 +```bash +# start inference on a gguf model +./main -m ./models/mymodel/ggml-model-Q4_K_M.gguf -n 128 ``` When running the larger models, make sure you have enough disk space to store all the intermediate files. @@ -710,7 +713,7 @@ From the unzipped folder, open a terminal/cmd window here and place a pre-conver As the models are currently fully loaded into memory, you will need adequate disk space to save them and sufficient RAM to load them. At the moment, memory and disk requirements are the same. -| Model | Original size | Quantized size (4-bit) | +| Model | Original size | Quantized size (Q4_0) | |------:|--------------:|-----------------------:| | 7B | 13 GB | 3.9 GB | | 13B | 24 GB | 7.8 GB | @@ -826,9 +829,9 @@ The `grammars/` folder contains a handful of sample grammars. To write your own, For authoring more complex JSON grammars, you can also check out https://grammar.intrinsiclabs.ai/, a browser app that lets you write TypeScript interfaces which it compiles to GBNF grammars that you can save for local use. Note that the app is built and maintained by members of the community, please file any issues or FRs on [its repo](http://github.com/intrinsiclabsai/gbnfgen) and not this one. -### Instruction mode with Alpaca +### Instruct mode -1. First, download the `ggml` Alpaca model into the `./models` folder +1. First, download and place the `ggml` model into the `./models` folder 2. Run the `main` tool like this: ``` @@ -854,50 +857,6 @@ cadaver, cauliflower, cabbage (vegetable), catalpa (tree) and Cailleach. > ``` -### Using [OpenLLaMA](https://github.com/openlm-research/open_llama) - -OpenLLaMA is an openly licensed reproduction of Meta's original LLaMA model. It uses the same architecture and is a drop-in replacement for the original LLaMA weights. - -- Download the [3B](https://huggingface.co/openlm-research/open_llama_3b), [7B](https://huggingface.co/openlm-research/open_llama_7b), or [13B](https://huggingface.co/openlm-research/open_llama_13b) model from Hugging Face. -- Convert the model to ggml FP16 format using `python convert.py ` - -### Using [GPT4All](https://github.com/nomic-ai/gpt4all) - -*Note: these instructions are likely obsoleted by the GGUF update* - -- Obtain the `tokenizer.model` file from LLaMA model and put it to `models` -- Obtain the `added_tokens.json` file from Alpaca model and put it to `models` -- Obtain the `gpt4all-lora-quantized.bin` file from GPT4All model and put it to `models/gpt4all-7B` -- It is distributed in the old `ggml` format which is now obsoleted -- You have to convert it to the new format using `convert.py`: - -```bash -python3 convert.py models/gpt4all-7B/gpt4all-lora-quantized.bin -``` - -- You can now use the newly generated `models/gpt4all-7B/ggml-model-q4_0.bin` model in exactly the same way as all other models - -- The newer GPT4All-J model is not yet supported! - -### Using Pygmalion 7B & Metharme 7B - -- Obtain the [LLaMA weights](#obtaining-the-facebook-llama-original-model-and-stanford-alpaca-model-data) -- Obtain the [Pygmalion 7B](https://huggingface.co/PygmalionAI/pygmalion-7b/) or [Metharme 7B](https://huggingface.co/PygmalionAI/metharme-7b) XOR encoded weights -- Convert the LLaMA model with [the latest HF convert script](https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/convert_llama_weights_to_hf.py) -- Merge the XOR files with the converted LLaMA weights by running the [xor_codec](https://huggingface.co/PygmalionAI/pygmalion-7b/blob/main/xor_codec.py) script -- Convert to `ggml` format using the `convert.py` script in this repo: -```bash -python3 convert.py pygmalion-7b/ --outtype q4_1 -``` -> The Pygmalion 7B & Metharme 7B weights are saved in [bfloat16](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format) precision. If you wish to convert to `ggml` without quantizating, please specify the `--outtype` as `f32` instead of `f16`. - - -### Obtaining the Facebook LLaMA original model and Stanford Alpaca model data - -- **Under no circumstances should IPFS, magnet links, or any other links to model downloads be shared anywhere in this repository, including in issues, discussions, or pull requests. They will be immediately deleted.** -- The LLaMA models are officially distributed by Facebook and will **never** be provided through this repository. -- Refer to [Facebook's LLaMA repository](https://github.com/facebookresearch/llama/pull/73/files) if you need to request access to the model data. - ### Obtaining and using the Facebook LLaMA 2 model - Refer to [Facebook's LLaMA download page](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) if you want to access the model data. @@ -909,20 +868,6 @@ python3 convert.py pygmalion-7b/ --outtype q4_1 - [LLaMA 2 13B chat](https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF) - [LLaMA 2 70B chat](https://huggingface.co/TheBloke/Llama-2-70B-chat-GGUF) -### Verifying the model files - -Please verify the [sha256 checksums](SHA256SUMS) of all downloaded model files to confirm that you have the correct model data files before creating an issue relating to your model files. -- The following python script will verify if you have all possible latest files in your self-installed `./models` subdirectory: - -```bash -# run the verification script -./scripts/verify-checksum-models.py -``` - -- On linux or macOS it is also possible to run the following commands to verify if you have all possible latest files in your self-installed `./models` subdirectory: - - On Linux: `sha256sum --ignore-missing -c SHA256SUMS` - - on macOS: `shasum -a 256 --ignore-missing -c SHA256SUMS` - ### Seminal papers and background on the models If your issue is with model generation quality, then please at least scan the following links and papers to understand the limitations of LLaMA models. This is especially important when choosing an appropriate model size and appreciating both the significant and subtle differences between LLaMA models and ChatGPT: diff --git a/SHA256SUMS b/SHA256SUMS deleted file mode 100644 index ca4d5a4a5..000000000 --- a/SHA256SUMS +++ /dev/null @@ -1,40 +0,0 @@ -700df0d3013b703a806d2ae7f1bfb8e59814e3d06ae78be0c66368a50059f33d models/7B/consolidated.00.pth -666a4bb533b303bdaf89e1b6a3b6f93535d868de31d903afdc20983dc526c847 models/7B/ggml-model-f16.bin -ec2f2d1f0dfb73b72a4cbac7fa121abbe04c37ab327125a38248f930c0f09ddf models/7B/ggml-model-q4_0.bin -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/7B/ggml-model-q4_1.bin -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/7B/ggml-model-q5_0.bin -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/7B/ggml-model-q5_1.bin -7e89e242ddc0dd6f060b43ca219ce8b3e8f08959a72cb3c0855df8bb04d46265 models/7B/params.json -745bf4e29a4dd6f411e72976d92b452da1b49168a4f41c951cfcc8051823cf08 models/13B/consolidated.00.pth -d5ccbcc465c71c0de439a5aeffebe8344c68a519bce70bc7f9f92654ee567085 models/13B/consolidated.01.pth -2b206e9b21fb1076f11cafc624e2af97c9e48ea09312a0962153acc20d45f808 models/13B/ggml-model-f16.bin -fad169e6f0f575402cf75945961cb4a8ecd824ba4da6be2af831f320c4348fa5 models/13B/ggml-model-q4_0.bin -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/13B/ggml-model-q4_1.bin -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/13B/ggml-model-q5_0.bin -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/13B/ggml-model-q5_1.bin -4ab77bec4d4405ccb66a97b282574c89a94417e3c32e5f68f37e2876fc21322f models/13B/params.json -e23294a58552d8cdec5b7e8abb87993b97ea6eced4178ff2697c02472539d067 models/30B/consolidated.00.pth -4e077b7136c7ae2302e954860cf64930458d3076fcde9443f4d0e939e95903ff models/30B/consolidated.01.pth -24a87f01028cbd3a12de551dcedb712346c0b5cbdeff1454e0ddf2df9b675378 models/30B/consolidated.02.pth -1adfcef71420886119544949767f6a56cb6339b4d5fcde755d80fe68b49de93b models/30B/consolidated.03.pth -7e1b524061a9f4b27c22a12d6d2a5bf13b8ebbea73e99f218809351ed9cf7d37 models/30B/ggml-model-f16.bin -d2a441403944819492ec8c2002cc36fa38468149bfb4b7b4c52afc7bd9a7166d models/30B/ggml-model-q4_0.bin -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/30B/ggml-model-q4_1.bin -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/30B/ggml-model-q5_0.bin -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/30B/ggml-model-q5_1.bin -2c07118ea98d69dbe7810d88520e30288fa994751b337f8fca02b171955f44cb models/30B/params.json -135c563f6b3938114458183afb01adc9a63bef3d8ff7cccc3977e5d3664ecafe models/65B/consolidated.00.pth -9a600b37b19d38c7e43809485f70d17d1dc12206c07efa83bc72bb498a568bde models/65B/consolidated.01.pth -e7babf7c5606f165a3756f527cb0fedc4f83e67ef1290391e52fb1cce5f26770 models/65B/consolidated.02.pth -73176ffb426b40482f2aa67ae1217ef79fbbd1fff5482bae5060cdc5a24ab70e models/65B/consolidated.03.pth -882e6431d0b08a8bc66261a0d3607da21cbaeafa96a24e7e59777632dbdac225 models/65B/consolidated.04.pth -a287c0dfe49081626567c7fe87f74cce5831f58e459b427b5e05567641f47b78 models/65B/consolidated.05.pth -72b4eba67a1a3b18cb67a85b70f8f1640caae9b40033ea943fb166bd80a7b36b models/65B/consolidated.06.pth -d27f5b0677d7ff129ceacd73fd461c4d06910ad7787cf217b249948c3f3bc638 models/65B/consolidated.07.pth -60758f2384d74e423dffddfd020ffed9d3bb186ebc54506f9c4a787d0f5367b0 models/65B/ggml-model-f16.bin -cde053439fa4910ae454407e2717cc46cc2c2b4995c00c93297a2b52e790fa92 models/65B/ggml-model-q4_0.bin -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/65B/ggml-model-q4_1.bin -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/65B/ggml-model-q5_0.bin -ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/65B/ggml-model-q5_1.bin -999ed1659b469ccc2a941714c0a9656fa571d17c9f7c8c7589817ca90edef51b models/65B/params.json -9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 models/tokenizer.model From ee1628bdfea8b0079fed0140ac2f00ef1b465b57 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Wed, 7 Feb 2024 07:54:50 +0100 Subject: [PATCH 50/94] Basic Vulkan Multi-GPU implementation (#5321) * Initial Vulkan multi-gpu implementation Move most global variables into backend context * Add names to backend device functions * Add further missing cleanup code * Reduce code duplication in tensor split layer assignment * generalize LLAMA_SPLIT_LAYER for all backends, do not expose device count and memory in llama.h * Only do device info print in the beginning and initialize one backend for cpu assist Add missing cleanup code * Rework backend memory management to make sure devices and buffers get properly allocated and freed * Rename cpu assist free function --------- Co-authored-by: slaren --- common/common.cpp | 8 +- ggml-vulkan.cpp | 2639 ++++++++++++++++++++++++++------------------- ggml-vulkan.h | 23 +- ggml.c | 14 +- llama.cpp | 69 +- 5 files changed, 1587 insertions(+), 1166 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 8c1a60583..e0082a823 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -46,6 +46,10 @@ #define GGML_USE_CUBLAS_SYCL #endif +#if (defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)) || defined(GGML_USE_VULKAN) +#define GGML_USE_CUBLAS_SYCL_VULKAN +#endif + int32_t get_num_physical_cores() { #ifdef __linux__ // enumerate the set of thread siblings, num entries is num cores @@ -660,8 +664,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { params.tensor_split[i] = 0.0f; } } -#ifndef GGML_USE_CUBLAS_SYCL - fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS/SYCL. Setting a tensor split has no effect.\n"); +#ifndef GGML_USE_CUBLAS_SYCL_VULKAN + fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS/SYCL/Vulkan. Setting a tensor split has no effect.\n"); #endif // GGML_USE_CUBLAS_SYCL } else if (arg == "--no-mmap") { params.use_mmap = false; diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 14fb89e09..9e2846ee4 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include "ggml.h" #include "ggml-backend-impl.h" @@ -37,6 +38,8 @@ #define GGML_VK_MAX_NODES 8192 +#define MAX_VK_BUFFERS 256 + #ifndef K_QUANTS_PER_ITERATION #define K_QUANTS_PER_ITERATION 1 #else @@ -53,15 +56,68 @@ static_assert(K_QUANTS_PER_ITERATION == 1 || K_QUANTS_PER_ITERATION == 2, "K_QUA } \ } while (0) -struct vk_buffer { +struct ggml_backend_vk_context; + +struct vk_queue { + uint32_t queue_family_index; + vk::Queue queue; + vk::CommandPool pool; + uint32_t cmd_buffer_idx; + std::vector cmd_buffers; + + vk::PipelineStageFlags stage_flags; +}; + +struct vk_device { + vk::PhysicalDevice physical_device; + vk::PhysicalDeviceProperties properties; + std::string name; + uint64_t max_memory_allocation_size; + bool fp16; + vk::Device device; + uint32_t vendor_id; + vk_queue compute_queue; + vk_queue transfer_queue; + bool single_queue; + uint32_t descriptor_set_mode; + uint32_t subgroup_size; + bool uma; + + ~vk_device() { +#ifdef GGML_VULKAN_DEBUG + std::cerr << "destroy device " << name << std::endl; +#endif + device.destroy(); + } +}; + +struct vk_buffer_struct { vk::Buffer buffer; vk::DeviceMemory device_memory; vk::MemoryPropertyFlags memory_property_flags; void * ptr; size_t size = 0; - uint32_t qf_owner; + + ggml_backend_vk_context * ctx; + + std::shared_ptr device; + + ~vk_buffer_struct() { + if (size == 0) { + return; + } +#ifdef GGML_VULKAN_DEBUG + std::cerr << "~vk_buffer_struct(" << buffer << ", " << size << ")" << std::endl; +#endif + + device->device.freeMemory(device_memory); + device->device.destroyBuffer(buffer); + } }; +typedef std::shared_ptr vk_buffer; +typedef std::weak_ptr vk_buffer_ref; + struct vk_subbuffer { vk_buffer buffer; uint64_t offset; @@ -70,6 +126,7 @@ struct vk_subbuffer { struct vk_pipeline { std::string name; + vk::ShaderModule shader_module; vk::DescriptorSetLayout dsl; std::vector descriptor_pools; std::vector descriptor_sets; @@ -82,16 +139,6 @@ struct vk_pipeline { uint32_t align; }; -struct vk_queue { - uint32_t queue_family_index; - vk::Queue queue; - vk::CommandPool pool; - uint32_t cmd_buffer_idx; - std::vector cmd_buffers; - - vk::PipelineStageFlags stage_flags; -}; - struct vk_semaphore { vk::Semaphore s; uint64_t value; @@ -105,20 +152,6 @@ struct vk_submission { typedef std::vector vk_sequence; -struct vk_device { - vk::PhysicalDevice physical_device; - vk::PhysicalDeviceProperties properties; - uint64_t max_memory_allocation_size; - bool fp16; - vk::Device device; - uint32_t vendor_id; - vk_queue compute_queue; - vk_queue transfer_queue; - uint32_t descriptor_set_mode; - uint32_t subgroup_size; - bool uma; -}; - struct vk_op_push_constants { uint32_t KX; uint32_t KY; @@ -190,13 +223,13 @@ struct ggml_tensor_extra_gpu { size_t ctx_idx; - vk_buffer buffer_gpu; + vk_buffer_ref buffer_gpu; uint64_t offset; void reset() { ready = false; ctx_idx = 0; - buffer_gpu.size = 0; + buffer_gpu.reset(); offset = 0; } }; @@ -210,69 +243,96 @@ struct ggml_vk_garbage_collector { std::vector contexts; }; -typedef void (*ggml_vk_func_t)(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +struct ggml_backend_vk_context { + std::string name; -vk::Instance vk_instance; -vk_device vk_device; -vk_pipeline vk_pipeline_matmul_f32_l, vk_pipeline_matmul_f32_m, vk_pipeline_matmul_f32_s; -vk_pipeline vk_pipeline_matmul_f32_aligned_l, vk_pipeline_matmul_f32_aligned_m, vk_pipeline_matmul_f32_aligned_s; -vk_pipeline vk_pipeline_matmul_f16_l, vk_pipeline_matmul_f16_m, vk_pipeline_matmul_f16_s; -vk_pipeline vk_pipeline_matmul_f16_aligned_l, vk_pipeline_matmul_f16_aligned_m, vk_pipeline_matmul_f16_aligned_s; -vk_pipeline vk_pipeline_matmul_f16_f32_l, vk_pipeline_matmul_f16_f32_m, vk_pipeline_matmul_f16_f32_s; -vk_pipeline vk_pipeline_matmul_f16_f32_aligned_l, vk_pipeline_matmul_f16_f32_aligned_m, vk_pipeline_matmul_f16_f32_aligned_s; -vk_pipeline vk_pipeline_matmul_split_k_reduce; -vk_pipeline vk_pipeline_dequant[VK_NUM_TYPES]; -vk_pipeline vk_pipeline_dequant_mul_mat_vec_f32[VK_NUM_TYPES]; -vk_pipeline vk_pipeline_mul_mat_vec_p021_f16_f32; -vk_pipeline vk_pipeline_mul_mat_vec_nc_f16_f32; -vk_pipeline vk_pipeline_get_rows[VK_NUM_TYPES]; -vk_pipeline vk_pipeline_get_rows_f32[VK_NUM_TYPES]; -vk_pipeline vk_pipeline_mul_f32; -vk_pipeline vk_pipeline_add_f32; -vk_pipeline vk_pipeline_scale_f32; -vk_pipeline vk_pipeline_sqr_f32; -vk_pipeline vk_pipeline_clamp_f32; -vk_pipeline vk_pipeline_cpy_f32_f32, vk_pipeline_cpy_f32_f16, vk_pipeline_cpy_f16_f16; -vk_pipeline vk_pipeline_norm_f32; -vk_pipeline vk_pipeline_rms_norm_f32; -vk_pipeline vk_pipeline_gelu_f32; -vk_pipeline vk_pipeline_silu_f32; -vk_pipeline vk_pipeline_relu_f32; -vk_pipeline vk_pipeline_diag_mask_inf_f32; -vk_pipeline vk_pipeline_soft_max_f32; -vk_pipeline vk_pipeline_rope_f32, vk_pipeline_rope_f16; -vk_pipeline vk_pipeline_rope_neox_f32, vk_pipeline_rope_neox_f16; + std::weak_ptr device; + vk_pipeline pipeline_matmul_f32_l, pipeline_matmul_f32_m, pipeline_matmul_f32_s; + vk_pipeline pipeline_matmul_f32_aligned_l, pipeline_matmul_f32_aligned_m, pipeline_matmul_f32_aligned_s; + vk_pipeline pipeline_matmul_f16_l, pipeline_matmul_f16_m, pipeline_matmul_f16_s; + vk_pipeline pipeline_matmul_f16_aligned_l, pipeline_matmul_f16_aligned_m, pipeline_matmul_f16_aligned_s; + vk_pipeline pipeline_matmul_f16_f32_l, pipeline_matmul_f16_f32_m, pipeline_matmul_f16_f32_s; + vk_pipeline pipeline_matmul_f16_f32_aligned_l, pipeline_matmul_f16_f32_aligned_m, pipeline_matmul_f16_f32_aligned_s; + vk_pipeline pipeline_matmul_split_k_reduce; + vk_pipeline pipeline_dequant[VK_NUM_TYPES]; + vk_pipeline pipeline_dequant_mul_mat_vec_f32[VK_NUM_TYPES]; + vk_pipeline pipeline_mul_mat_vec_p021_f16_f32; + vk_pipeline pipeline_mul_mat_vec_nc_f16_f32; + vk_pipeline pipeline_get_rows[VK_NUM_TYPES]; + vk_pipeline pipeline_get_rows_f32[VK_NUM_TYPES]; + vk_pipeline pipeline_mul_f32; + vk_pipeline pipeline_add_f32; + vk_pipeline pipeline_scale_f32; + vk_pipeline pipeline_sqr_f32; + vk_pipeline pipeline_clamp_f32; + vk_pipeline pipeline_cpy_f32_f32, pipeline_cpy_f32_f16, pipeline_cpy_f16_f16; + vk_pipeline pipeline_norm_f32; + vk_pipeline pipeline_rms_norm_f32; + vk_pipeline pipeline_gelu_f32; + vk_pipeline pipeline_silu_f32; + vk_pipeline pipeline_relu_f32; + vk_pipeline pipeline_diag_mask_inf_f32; + vk_pipeline pipeline_soft_max_f32; + vk_pipeline pipeline_rope_f32, pipeline_rope_f16; + vk_pipeline pipeline_rope_neox_f32, pipeline_rope_neox_f16; -static size_t vk_semaphore_idx, vk_event_idx; -static ggml_vk_garbage_collector vk_gc; -static std::vector> vk_pinned_memory; -static size_t vk_prealloc_size_qx, vk_prealloc_size_qy, vk_prealloc_size_x, vk_prealloc_size_y, vk_prealloc_size_split_k; -static vk_buffer vk_prealloc_qx, vk_prealloc_qy, vk_prealloc_x, vk_prealloc_y, vk_prealloc_split_k; -static vk::Fence vk_fence; -static vk_buffer vk_staging; -static size_t vk_staging_size; -static size_t vk_staging_offset; -static vk_buffer vk_sync_staging; + size_t semaphore_idx, event_idx; + ggml_vk_garbage_collector gc; + std::vector> pinned_memory; + size_t prealloc_size_qx, prealloc_size_qy, prealloc_size_x, prealloc_size_y, prealloc_size_split_k; + vk_buffer prealloc_qx, prealloc_qy, prealloc_x, prealloc_y, prealloc_split_k; + vk::Fence fence; + vk_buffer staging; + size_t staging_size; + size_t staging_offset; + vk_buffer sync_staging; -static vk_context * vk_ctx; -static vk_context * vk_transfer_ctx; + vk_buffer buffer_pool[MAX_VK_BUFFERS]; -static bool vk_disable; + vk_context * compute_ctx; + vk_context * transfer_ctx; + + bool disable; + bool initialized; + + size_t idx; +}; + +struct vk_instance { + vk::Instance instance; + + std::vector device_indices; + + std::shared_ptr devices[GGML_VK_MAX_DEVICES]; + ggml_backend_t backends[GGML_VK_MAX_DEVICES]; + ggml_backend_vk_context contexts[GGML_VK_MAX_DEVICES]; + ggml_backend_buffer_type buffer_types[GGML_VK_MAX_DEVICES]; + bool initialized[GGML_VK_MAX_DEVICES]; +}; #ifdef GGML_VULKAN_CHECK_RESULTS -size_t vk_skip_checks; -size_t vk_output_tensor; +static size_t vk_skip_checks; +static size_t vk_output_tensor; + +static void ggml_vk_print_tensor(ggml_backend * ctx, const ggml_tensor * tensor, const char * name); +static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor); +static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor); #endif -static vk_pipeline ggml_vk_create_pipeline(const std::string& name, size_t spv_size, const void* spv_data, const std::string& entrypoint, uint32_t parameter_count, uint32_t push_constant_size, std::array wg_denoms, std::vector&& specialization_constants, uint32_t align) { +typedef void (*ggml_vk_func_t)(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); + +static bool vk_instance_initialized = false; +static vk_instance vk_instance; + +GGML_CALL static void ggml_backend_vk_free(ggml_backend_t backend); + +static void ggml_vk_create_pipeline(ggml_backend_vk_context * ctx, vk_pipeline& pipeline, const std::string& name, size_t spv_size, const void* spv_data, const std::string& entrypoint, uint32_t parameter_count, uint32_t push_constant_size, std::array wg_denoms, std::vector&& specialization_constants, uint32_t align) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_create_pipeline(" << name << ", " << entrypoint << ", " << parameter_count << ", " << push_constant_size << ", (" << wg_denoms[0] << "," << wg_denoms[1] << "," << wg_denoms[2] << "), specialization_constants, " << align << ")" << std::endl; #endif GGML_ASSERT(parameter_count > 0); GGML_ASSERT(wg_denoms[0] > 0 && wg_denoms[1] > 0 && wg_denoms[2] > 0); // NOLINT - vk_pipeline pipeline; - pipeline.name = name; pipeline.parameter_count = parameter_count; pipeline.push_constant_size = push_constant_size; @@ -280,7 +340,7 @@ static vk_pipeline ggml_vk_create_pipeline(const std::string& name, size_t spv_s pipeline.align = align; vk::ShaderModuleCreateInfo shader_module_create_info({}, spv_size, reinterpret_cast(spv_data)); - vk::ShaderModule shader_module = vk_device.device.createShaderModule(shader_module_create_info); + pipeline.shader_module = ctx->device.lock()->device.createShaderModule(shader_module_create_info); std::vector dsl_binding; std::vector dsl_binding_flags; @@ -301,17 +361,17 @@ static vk_pipeline ggml_vk_create_pipeline(const std::string& name, size_t spv_s {}, dsl_binding); descriptor_set_layout_create_info.setPNext(&dslbfci); - pipeline.dsl = vk_device.device.createDescriptorSetLayout(descriptor_set_layout_create_info); + pipeline.dsl = ctx->device.lock()->device.createDescriptorSetLayout(descriptor_set_layout_create_info); // Check if device supports multiple descriptors per pool - if (vk_device.descriptor_set_mode == VK_DEVICE_DESCRIPTOR_POOL_MODE_UNKNOWN) { + if (ctx->device.lock()->descriptor_set_mode == VK_DEVICE_DESCRIPTOR_POOL_MODE_UNKNOWN) { const uint32_t alloc_count = 2; // Try allocating multiple sets from one pool // This fails on AMD for some reason, so add a fall back to allocating one pool per set vk::DescriptorPoolSize descriptor_pool_size(vk::DescriptorType::eStorageBuffer, pipeline.parameter_count); vk::DescriptorPoolCreateInfo descriptor_pool_create_info({}, alloc_count, descriptor_pool_size); - vk::DescriptorPool pool = vk_device.device.createDescriptorPool(descriptor_pool_create_info); + vk::DescriptorPool pool = ctx->device.lock()->device.createDescriptorPool(descriptor_pool_create_info); std::vector layouts(alloc_count); for (uint32_t i = 0; i < alloc_count; i++) { @@ -319,24 +379,24 @@ static vk_pipeline ggml_vk_create_pipeline(const std::string& name, size_t spv_s } try { vk::DescriptorSetAllocateInfo descriptor_set_alloc_info(pool, alloc_count, layouts.data()); - std::vector sets = vk_device.device.allocateDescriptorSets(descriptor_set_alloc_info); + std::vector sets = ctx->device.lock()->device.allocateDescriptorSets(descriptor_set_alloc_info); } catch(vk::OutOfPoolMemoryError const&) { - vk_device.descriptor_set_mode = VK_DEVICE_DESCRIPTOR_POOL_MODE_SINGLE; + ctx->device.lock()->descriptor_set_mode = VK_DEVICE_DESCRIPTOR_POOL_MODE_SINGLE; } - vk_device.device.destroyDescriptorPool(pool); + ctx->device.lock()->device.destroyDescriptorPool(pool); } - if (vk_device.descriptor_set_mode == VK_DEVICE_DESCRIPTOR_POOL_MODE_MULTI) { + if (ctx->device.lock()->descriptor_set_mode == VK_DEVICE_DESCRIPTOR_POOL_MODE_MULTI) { vk::DescriptorPoolSize descriptor_pool_size(vk::DescriptorType::eStorageBuffer, pipeline.parameter_count); vk::DescriptorPoolCreateInfo descriptor_pool_create_info({}, 128, descriptor_pool_size); - pipeline.descriptor_pools.push_back(vk_device.device.createDescriptorPool(descriptor_pool_create_info)); + pipeline.descriptor_pools.push_back(ctx->device.lock()->device.createDescriptorPool(descriptor_pool_create_info)); } pipeline.descriptor_set_idx = 0; vk::PipelineLayoutCreateInfo pipeline_layout_create_info(vk::PipelineLayoutCreateFlags(), pipeline.dsl, pcr); - pipeline.layout = vk_device.device.createPipelineLayout(pipeline_layout_create_info); + pipeline.layout = ctx->device.lock()->device.createPipelineLayout(pipeline_layout_create_info); std::vector specialization_entries(specialization_constants.size()); @@ -356,41 +416,45 @@ static vk_pipeline ggml_vk_create_pipeline(const std::string& name, size_t spv_s vk::PipelineShaderStageCreateInfo pipeline_shader_create_info( vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eCompute, - shader_module, + pipeline.shader_module, entrypoint.c_str(), &specialization_info); vk::ComputePipelineCreateInfo compute_pipeline_create_info( vk::PipelineCreateFlags(), pipeline_shader_create_info, pipeline.layout); - pipeline.pipeline = vk_device.device.createComputePipeline(VK_NULL_HANDLE, compute_pipeline_create_info).value; + pipeline.pipeline = ctx->device.lock()->device.createComputePipeline(VK_NULL_HANDLE, compute_pipeline_create_info).value; - return pipeline; + ctx->gc.pipelines.push_back(&pipeline); } -static void ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline& pipeline, uint32_t n) { +static void ggml_vk_destroy_pipeline(ggml_backend_vk_context * ctx, vk_pipeline * pipeline) { + for (auto& pool : pipeline->descriptor_pools) { + ctx->device.lock()->device.destroyDescriptorPool(pool); + } + pipeline->descriptor_pools.clear(); + pipeline->descriptor_sets.clear(); + pipeline->descriptor_set_idx = 0; + + ctx->device.lock()->device.destroyDescriptorSetLayout(pipeline->dsl); + + ctx->device.lock()->device.destroyPipelineLayout(pipeline->layout); + + ctx->device.lock()->device.destroyShaderModule(pipeline->shader_module); + + ctx->device.lock()->device.destroyPipeline(pipeline->pipeline); +} + +static void ggml_pipeline_allocate_descriptor_sets(ggml_backend_vk_context * ctx, vk_pipeline& pipeline, uint32_t n) { #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_pipeline_allocate_descriptor_sets(" << pipeline.name << ", " << n << ")" << std::endl; + std::cerr << "ggml_pipeline_allocate_descriptor_sets(" << pipeline.name << ", " << n << ")" << std::endl; #endif - // Check if gc already contains pipeline before adding it - bool gc_found = false; - for (auto * pl : vk_gc.pipelines) { - if (&pipeline == pl) { - gc_found = true; - break; - } - } - - if (!gc_found) { - vk_gc.pipelines.push_back(&pipeline); - } - if (pipeline.descriptor_sets.size() >= pipeline.descriptor_set_idx + n) { // Enough descriptors are available return; } - if (vk_device.descriptor_set_mode == VK_DEVICE_DESCRIPTOR_POOL_MODE_MULTI) { + if (ctx->device.lock()->descriptor_set_mode == VK_DEVICE_DESCRIPTOR_POOL_MODE_MULTI) { const uint32_t alloc_count = pipeline.descriptor_set_idx + n - pipeline.descriptor_sets.size(); std::vector layouts(alloc_count); @@ -398,29 +462,29 @@ static void ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline& pipeline, uin layouts[i] = pipeline.dsl; } vk::DescriptorSetAllocateInfo descriptor_set_alloc_info(pipeline.descriptor_pools[0], alloc_count, layouts.data()); - std::vector sets = vk_device.device.allocateDescriptorSets(descriptor_set_alloc_info); + std::vector sets = ctx->device.lock()->device.allocateDescriptorSets(descriptor_set_alloc_info); pipeline.descriptor_sets.insert(pipeline.descriptor_sets.end(), sets.begin(), sets.end()); } else { for (uint32_t i = pipeline.descriptor_sets.size(); i < pipeline.descriptor_set_idx + n; i++) { vk::DescriptorPoolSize descriptor_pool_size(vk::DescriptorType::eStorageBuffer, pipeline.parameter_count); vk::DescriptorPoolCreateInfo descriptor_pool_create_info({}, 1, descriptor_pool_size); - pipeline.descriptor_pools.push_back(vk_device.device.createDescriptorPool(descriptor_pool_create_info)); + pipeline.descriptor_pools.push_back(ctx->device.lock()->device.createDescriptorPool(descriptor_pool_create_info)); vk::DescriptorSetAllocateInfo descriptor_set_alloc_info(pipeline.descriptor_pools[i], 1, &pipeline.dsl); - std::vector sets = vk_device.device.allocateDescriptorSets(descriptor_set_alloc_info); + std::vector sets = ctx->device.lock()->device.allocateDescriptorSets(descriptor_set_alloc_info); pipeline.descriptor_sets.push_back(sets[0]); } } } -static void ggml_vk_pipeline_cleanup(vk_pipeline& pipeline) { +static void ggml_pipeline_cleanup(vk_pipeline& pipeline) { #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_pipeline_cleanup(" << pipeline.name << ")" << std::endl; + std::cerr << "ggml_pipeline_cleanup(" << pipeline.name << ")" << std::endl; #endif pipeline.descriptor_set_idx = 0; } -static vk::CommandBuffer ggml_vk_create_cmd_buffer(vk_queue& q) { +static vk::CommandBuffer ggml_vk_create_cmd_buffer(ggml_backend_vk_context * ctx, vk_queue& q) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_create_cmd_buffer()" << std::endl; #endif @@ -433,7 +497,7 @@ static vk::CommandBuffer ggml_vk_create_cmd_buffer(vk_queue& q) { q.pool, vk::CommandBufferLevel::ePrimary, 1); - const std::vector cmd_buffers = vk_device.device.allocateCommandBuffers(command_buffer_alloc_info); + const std::vector cmd_buffers = ctx->device.lock()->device.allocateCommandBuffers(command_buffer_alloc_info); auto buf = cmd_buffers.front(); q.cmd_buffers.push_back(buf); @@ -442,24 +506,17 @@ static vk::CommandBuffer ggml_vk_create_cmd_buffer(vk_queue& q) { return buf; } -static vk_submission ggml_vk_create_submission(vk_queue& q, std::vector wait_semaphores, std::vector signal_semaphores) { +static vk_submission ggml_vk_create_submission(ggml_backend_vk_context * ctx, vk_queue& q, std::vector wait_semaphores, std::vector signal_semaphores) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_create_submission()" << std::endl; #endif vk_submission s; - s.buffer = ggml_vk_create_cmd_buffer(q); + s.buffer = ggml_vk_create_cmd_buffer(ctx, q); s.wait_semaphores = std::move(wait_semaphores); s.signal_semaphores = std::move(signal_semaphores); return s; } -static vk_sequence ggml_vk_create_sequence_1(vk_queue& q, std::vector wait_semaphores, std::vector signal_semaphores) { -#ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_create_sequence_1()" << std::endl; -#endif - return { ggml_vk_create_submission(q, std::move(wait_semaphores), std::move(signal_semaphores)) }; -} - static void ggml_vk_submit(vk_context * ctx, vk::Fence fence) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_submit(" << ctx->seqs.size() << ", " << fence << ")" << std::endl; @@ -578,89 +635,89 @@ static uint32_t ggml_vk_find_queue_family_index(std::vectordevice.lock()->device.createCommandPool(command_pool_create_info_compute); q.cmd_buffer_idx = 0; - q.queue = vk_device.device.getQueue(queue_family_index, queue_index); + q.queue = ctx->device.lock()->device.getQueue(queue_family_index, queue_index); q.stage_flags = stage_flags; - - return q; } -static vk_context * ggml_vk_create_context(vk_queue& q) { +static vk_context * ggml_vk_create_context(ggml_backend_vk_context * ctx, vk_queue& q) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_create_context()" << std::endl; #endif - vk_gc.contexts.emplace_back(); - vk_context * result = &vk_gc.contexts[vk_gc.contexts.size() - 1]; + ctx->gc.contexts.emplace_back(); + vk_context * result = &ctx->gc.contexts[ctx->gc.contexts.size() - 1]; memset((void *) result, 0, sizeof(vk_context)); - result->idx = vk_gc.contexts.size() - 1; + result->idx = ctx->gc.contexts.size() - 1; result->q = &q; return result; } -static vk_semaphore * ggml_vk_create_binary_semaphore() { +static vk_semaphore * ggml_vk_create_binary_semaphore(ggml_backend_vk_context * ctx) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_create_timeline_semaphore()" << std::endl; #endif vk::SemaphoreTypeCreateInfo tci{ vk::SemaphoreType::eBinary, 0 }; vk::SemaphoreCreateInfo ci{}; ci.setPNext(&tci); - vk::Semaphore semaphore = vk_device.device.createSemaphore(ci); - vk_gc.semaphores.push_back({ semaphore, 0 }); - return &vk_gc.semaphores[vk_gc.semaphores.size() - 1]; + vk::Semaphore semaphore = ctx->device.lock()->device.createSemaphore(ci); + ctx->gc.semaphores.push_back({ semaphore, 0 }); + return &ctx->gc.semaphores[ctx->gc.semaphores.size() - 1]; } -static vk_semaphore * ggml_vk_create_timeline_semaphore() { +static vk_semaphore * ggml_vk_create_timeline_semaphore(ggml_backend_vk_context * ctx) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_create_timeline_semaphore()" << std::endl; #endif - if (vk_semaphore_idx >= vk_gc.tl_semaphores.size()) { + if (ctx->semaphore_idx >= ctx->gc.tl_semaphores.size()) { vk::SemaphoreTypeCreateInfo tci{ vk::SemaphoreType::eTimeline, 0 }; vk::SemaphoreCreateInfo ci{}; ci.setPNext(&tci); - vk::Semaphore semaphore = vk_device.device.createSemaphore(ci); - vk_gc.tl_semaphores.push_back({ semaphore, 0 }); + vk::Semaphore semaphore = ctx->device.lock()->device.createSemaphore(ci); + ctx->gc.tl_semaphores.push_back({ semaphore, 0 }); } - return &vk_gc.tl_semaphores[vk_semaphore_idx++]; + return &ctx->gc.tl_semaphores[ctx->semaphore_idx++]; } -static vk::Event ggml_vk_create_event() { - if (vk_event_idx >= vk_gc.events.size()) { - vk_gc.events.push_back(vk_device.device.createEvent({})); +static vk::Event ggml_vk_create_event(ggml_backend_vk_context * ctx) { + if (ctx->event_idx >= ctx->gc.events.size()) { + ctx->gc.events.push_back(ctx->device.lock()->device.createEvent({})); } - return vk_gc.events[vk_event_idx++]; + return ctx->gc.events[ctx->event_idx++]; } -static void ggml_vk_queue_cleanup(vk_queue& q) { +static void ggml_vk_queue_cleanup(ggml_backend_vk_context * ctx, vk_queue& q) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_queue_cleanup()" << std::endl; #endif // Requires command buffers to be done - vk_device.device.resetCommandPool(q.pool); + ctx->device.lock()->device.resetCommandPool(q.pool); q.cmd_buffer_idx = 0; } -static vk_buffer ggml_vk_create_buffer(size_t size, vk::MemoryPropertyFlags req_flags) { +static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_create_buffer(" << size << ", " << to_string(req_flags) << ")" << std::endl; #endif - GGML_ASSERT(size > 0); + vk_buffer buf = std::make_shared(); - vk_buffer buf; + if (size == 0) { + buf->size = 0; + return buf; + } - buf.size = size; + buf->size = size; vk::BufferCreateInfo buffer_create_info{ vk::BufferCreateFlags(), size, @@ -670,11 +727,11 @@ static vk_buffer ggml_vk_create_buffer(size_t size, vk::MemoryPropertyFlags req_ nullptr, }; - buf.buffer = vk_device.device.createBuffer(buffer_create_info); + buf->buffer = ctx->device.lock()->device.createBuffer(buffer_create_info); - vk::MemoryRequirements mem_req = vk_device.device.getBufferMemoryRequirements(buf.buffer); + vk::MemoryRequirements mem_req = ctx->device.lock()->device.getBufferMemoryRequirements(buf->buffer); - vk::PhysicalDeviceMemoryProperties mem_props = vk_device.physical_device.getMemoryProperties(); + vk::PhysicalDeviceMemoryProperties mem_props = ctx->device.lock()->physical_device.getMemoryProperties(); uint32_t memory_type_index = UINT32_MAX; @@ -691,30 +748,36 @@ static vk_buffer ggml_vk_create_buffer(size_t size, vk::MemoryPropertyFlags req_ } try { - buf.device_memory = vk_device.device.allocateMemory({ mem_req.size, memory_type_index }); + buf->device_memory = ctx->device.lock()->device.allocateMemory({ mem_req.size, memory_type_index }); } catch (const vk::SystemError& e) { // Out of Host/Device memory, clean up buffer - vk_device.device.destroyBuffer(buf.buffer); - buf.size = 0; + ctx->device.lock()->device.destroyBuffer(buf->buffer); + buf->size = 0; throw e; } - buf.memory_property_flags = req_flags; - buf.ptr = nullptr; + buf->memory_property_flags = req_flags; + buf->ptr = nullptr; if (req_flags & vk::MemoryPropertyFlagBits::eHostVisible) { - buf.ptr = vk_device.device.mapMemory(buf.device_memory, 0, VK_WHOLE_SIZE); + buf->ptr = ctx->device.lock()->device.mapMemory(buf->device_memory, 0, VK_WHOLE_SIZE); } - vk_device.device.bindBufferMemory(buf.buffer, buf.device_memory, 0); + ctx->device.lock()->device.bindBufferMemory(buf->buffer, buf->device_memory, 0); - buf.qf_owner = VK_QUEUE_FAMILY_IGNORED; + buf->ctx = ctx; + + buf->device = ctx->device.lock(); + +#ifdef GGML_VULKAN_DEBUG + std::cerr << "Created buffer " << buf->buffer << std::endl; +#endif return buf; } -static vk_buffer ggml_vk_create_buffer_check(size_t size, vk::MemoryPropertyFlags req_flags) { +static vk_buffer ggml_vk_create_buffer_check(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags) { try { - return ggml_vk_create_buffer(size, req_flags); + return ggml_vk_create_buffer(ctx, size, req_flags); } catch (const vk::SystemError& e) { std::cerr << "ggml_vulkan: Memory allocation of size " << size << " failed." << std::endl; std::cerr << "ggml_vulkan: " << e.what() << std::endl; @@ -722,14 +785,14 @@ static vk_buffer ggml_vk_create_buffer_check(size_t size, vk::MemoryPropertyFlag } } -static vk_buffer ggml_vk_create_buffer_device(size_t size) { +static vk_buffer ggml_vk_create_buffer_device(ggml_backend_vk_context * ctx, size_t size) { vk_buffer buf; try { - buf = ggml_vk_create_buffer(size, vk::MemoryPropertyFlagBits::eDeviceLocal); + buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eDeviceLocal); } catch (const vk::SystemError& e) { - if (vk_device.uma) { + if (ctx->device.lock()->uma) { // Fall back to host memory type - buf = ggml_vk_create_buffer_check(size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent); + buf = ggml_vk_create_buffer_check(ctx, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent); } else { std::cerr << "ggml_vulkan: Device memory allocation of size " << size << " failed." << std::endl; std::cerr << "ggml_vulkan: " << e.what() << std::endl; @@ -741,16 +804,7 @@ static vk_buffer ggml_vk_create_buffer_device(size_t size) { } static void ggml_vk_destroy_buffer(vk_buffer& buf) { - if (buf.size == 0) { - return; - } -#ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_destroy_buffer(" << buf.size << ")" << std::endl; -#endif - - buf.size = 0; - vk_device.device.freeMemory(buf.device_memory); - vk_device.device.destroyBuffer(buf.buffer); + buf.reset(); } static vk_subbuffer ggml_vk_subbuffer(vk_buffer& buf) { @@ -773,7 +827,7 @@ static void ggml_vk_sync_buffers(vk_context * ctx) { ); } -static void ggml_vk_wait_events(vk::CommandBuffer& cmd_buffer, std::vector&& events, vk::PipelineStageFlags src_stages, vk::PipelineStageFlags dst_stages) { +static void ggml_vk_wait_events(vk_context * ctx, std::vector&& events) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_wait_events()" << std::endl; #endif @@ -781,10 +835,10 @@ static void ggml_vk_wait_events(vk::CommandBuffer& cmd_buffer, std::vectors->buffer.waitEvents( events, - src_stages, - dst_stages, + ctx->q->stage_flags, + ctx->q->stage_flags, {}, {}, {} @@ -810,15 +864,15 @@ static bool ggml_vk_build_shader(ggml_type type) { } } -static void ggml_vk_load_shaders() { +static void ggml_vk_load_shaders(ggml_backend_vk_context * ctx) { #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_load_shaders()" << std::endl; + std::cerr << "ggml_vk_load_shaders(" << ctx->name << ")" << std::endl; #endif // mulmat - std::initializer_list warptile_l = { 128, 128, 128, 16, vk_device.subgroup_size * 2, 64, 2, 4, 4, vk_device.subgroup_size }; - std::initializer_list warptile_m = { 128, 64, 64, 16, vk_device.subgroup_size, 32, 2, 4, 2, vk_device.subgroup_size }; - std::initializer_list warptile_s = { vk_device.subgroup_size, 32, 32, 16, 32, 32, 2, 2, 2, vk_device.subgroup_size }; + std::initializer_list warptile_l = { 128, 128, 128, 16, ctx->device.lock()->subgroup_size * 2, 64, 2, 4, 4, ctx->device.lock()->subgroup_size }; + std::initializer_list warptile_m = { 128, 64, 64, 16, ctx->device.lock()->subgroup_size, 32, 2, 4, 2, ctx->device.lock()->subgroup_size }; + std::initializer_list warptile_s = { ctx->device.lock()->subgroup_size, 32, 32, 16, 32, 32, 2, 2, 2, ctx->device.lock()->subgroup_size }; std::array l_wg_denoms = {128, 128, 1 }; std::array m_wg_denoms = { 64, 64, 1 }; @@ -828,145 +882,208 @@ static void ggml_vk_load_shaders() { uint32_t m_align = 64; uint32_t s_align = 32; - if (vk_device.fp16) { - vk_pipeline_matmul_f32_l = ggml_vk_create_pipeline("matmul_f32_l", matmul_f32_l_len, matmul_f32_l_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, 1); - vk_pipeline_matmul_f32_m = ggml_vk_create_pipeline("matmul_f32_m", matmul_f32_m_len, matmul_f32_m_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, 1); - vk_pipeline_matmul_f32_s = ggml_vk_create_pipeline("matmul_f32_s", matmul_f32_s_len, matmul_f32_s_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, 1); - vk_pipeline_matmul_f32_aligned_l = ggml_vk_create_pipeline("matmul_f32_aligned_l", matmul_f32_aligned_l_len, matmul_f32_aligned_l_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); - vk_pipeline_matmul_f32_aligned_m = ggml_vk_create_pipeline("matmul_f32_aligned_m", matmul_f32_aligned_m_len, matmul_f32_aligned_m_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); - vk_pipeline_matmul_f32_aligned_s = ggml_vk_create_pipeline("matmul_f32_aligned_s", matmul_f32_aligned_s_len, matmul_f32_aligned_s_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); + if (ctx->device.lock()->fp16) { + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f32_l, "matmul_f32_l", matmul_f32_l_len, matmul_f32_l_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f32_m, "matmul_f32_m", matmul_f32_m_len, matmul_f32_m_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f32_s, "matmul_f32_s", matmul_f32_s_len, matmul_f32_s_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f32_aligned_l, "matmul_f32_aligned_l", matmul_f32_aligned_l_len, matmul_f32_aligned_l_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f32_aligned_m, "matmul_f32_aligned_m", matmul_f32_aligned_m_len, matmul_f32_aligned_m_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f32_aligned_s, "matmul_f32_aligned_s", matmul_f32_aligned_s_len, matmul_f32_aligned_s_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); - vk_pipeline_matmul_f16_l = ggml_vk_create_pipeline("matmul_f16_l", matmul_f16_l_len, matmul_f16_l_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, 1); - vk_pipeline_matmul_f16_m = ggml_vk_create_pipeline("matmul_f16_m", matmul_f16_m_len, matmul_f16_m_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, 1); - vk_pipeline_matmul_f16_s = ggml_vk_create_pipeline("matmul_f16_s", matmul_f16_s_len, matmul_f16_s_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_l, "matmul_f16_l", matmul_f16_l_len, matmul_f16_l_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_m, "matmul_f16_m", matmul_f16_m_len, matmul_f16_m_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_s, "matmul_f16_s", matmul_f16_s_len, matmul_f16_s_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_aligned_l, "matmul_f16_aligned_l", matmul_f16_aligned_l_len, matmul_f16_aligned_l_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_aligned_m, "matmul_f16_aligned_m", matmul_f16_aligned_m_len, matmul_f16_aligned_m_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_aligned_s, "matmul_f16_aligned_s", matmul_f16_aligned_s_len, matmul_f16_aligned_s_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); - vk_pipeline_matmul_f16_aligned_l = ggml_vk_create_pipeline("matmul_f16_aligned_l", matmul_f16_aligned_l_len, matmul_f16_aligned_l_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); - vk_pipeline_matmul_f16_aligned_m = ggml_vk_create_pipeline("matmul_f16_aligned_m", matmul_f16_aligned_m_len, matmul_f16_aligned_m_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); - vk_pipeline_matmul_f16_aligned_s = ggml_vk_create_pipeline("matmul_f16_aligned_s", matmul_f16_aligned_s_len, matmul_f16_aligned_s_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); - - vk_pipeline_matmul_f16_f32_l = ggml_vk_create_pipeline("matmul_f16_f32_l", matmul_f16_f32_l_len, matmul_f16_f32_l_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, 1); - vk_pipeline_matmul_f16_f32_m = ggml_vk_create_pipeline("matmul_f16_f32_m", matmul_f16_f32_m_len, matmul_f16_f32_m_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, 1); - vk_pipeline_matmul_f16_f32_s = ggml_vk_create_pipeline("matmul_f16_f32_s", matmul_f16_f32_s_len, matmul_f16_f32_s_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, 1); - vk_pipeline_matmul_f16_f32_aligned_l = ggml_vk_create_pipeline("matmul_f16_f32_aligned_l", matmul_f16_f32_aligned_l_len, matmul_f16_f32_aligned_l_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); - vk_pipeline_matmul_f16_f32_aligned_m = ggml_vk_create_pipeline("matmul_f16_f32_aligned_m", matmul_f16_f32_aligned_m_len, matmul_f16_f32_aligned_m_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); - vk_pipeline_matmul_f16_f32_aligned_s = ggml_vk_create_pipeline("matmul_f16_f32_aligned_s", matmul_f16_f32_aligned_s_len, matmul_f16_f32_aligned_s_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_f32_l, "matmul_f16_f32_l", matmul_f16_f32_l_len, matmul_f16_f32_l_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_f32_m, "matmul_f16_f32_m", matmul_f16_f32_m_len, matmul_f16_f32_m_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_f32_s, "matmul_f16_f32_s", matmul_f16_f32_s_len, matmul_f16_f32_s_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_f32_aligned_l, "matmul_f16_f32_aligned_l", matmul_f16_f32_aligned_l_len, matmul_f16_f32_aligned_l_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_f32_aligned_m, "matmul_f16_f32_aligned_m", matmul_f16_f32_aligned_m_len, matmul_f16_f32_aligned_m_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_f32_aligned_s, "matmul_f16_f32_aligned_s", matmul_f16_f32_aligned_s_len, matmul_f16_f32_aligned_s_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); } else { - vk_pipeline_matmul_f32_l = ggml_vk_create_pipeline("matmul_f32_l", matmul_f32_l_fp32_len, matmul_f32_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, 1); - vk_pipeline_matmul_f32_m = ggml_vk_create_pipeline("matmul_f32_m", matmul_f32_m_fp32_len, matmul_f32_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, 1); - vk_pipeline_matmul_f32_s = ggml_vk_create_pipeline("matmul_f32_s", matmul_f32_s_fp32_len, matmul_f32_s_fp32_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, 1); - vk_pipeline_matmul_f32_aligned_l = ggml_vk_create_pipeline("matmul_f32_aligned_l", matmul_f32_aligned_l_fp32_len, matmul_f32_aligned_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); - vk_pipeline_matmul_f32_aligned_m = ggml_vk_create_pipeline("matmul_f32_aligned_m", matmul_f32_aligned_m_fp32_len, matmul_f32_aligned_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); - vk_pipeline_matmul_f32_aligned_s = ggml_vk_create_pipeline("matmul_f32_aligned_s", matmul_f32_aligned_s_fp32_len, matmul_f32_aligned_s_fp32_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f32_l, "matmul_f32_l", matmul_f32_l_fp32_len, matmul_f32_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f32_m, "matmul_f32_m", matmul_f32_m_fp32_len, matmul_f32_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f32_s, "matmul_f32_s", matmul_f32_s_fp32_len, matmul_f32_s_fp32_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f32_aligned_l, "matmul_f32_aligned_l", matmul_f32_aligned_l_fp32_len, matmul_f32_aligned_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f32_aligned_m, "matmul_f32_aligned_m", matmul_f32_aligned_m_fp32_len, matmul_f32_aligned_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f32_aligned_s, "matmul_f32_aligned_s", matmul_f32_aligned_s_fp32_len, matmul_f32_aligned_s_fp32_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); - vk_pipeline_matmul_f16_l = ggml_vk_create_pipeline("matmul_f16_l", matmul_f16_l_fp32_len, matmul_f16_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, 1); - vk_pipeline_matmul_f16_m = ggml_vk_create_pipeline("matmul_f16_m", matmul_f16_m_fp32_len, matmul_f16_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, 1); - vk_pipeline_matmul_f16_s = ggml_vk_create_pipeline("matmul_f16_s", matmul_f16_s_fp32_len, matmul_f16_s_fp32_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_l, "matmul_f16_l", matmul_f16_l_fp32_len, matmul_f16_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_m, "matmul_f16_m", matmul_f16_m_fp32_len, matmul_f16_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_s, "matmul_f16_s", matmul_f16_s_fp32_len, matmul_f16_s_fp32_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_aligned_l, "matmul_f16_aligned_l", matmul_f16_aligned_l_fp32_len, matmul_f16_aligned_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_aligned_m, "matmul_f16_aligned_m", matmul_f16_aligned_m_fp32_len, matmul_f16_aligned_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_aligned_s, "matmul_f16_aligned_s", matmul_f16_aligned_s_fp32_len, matmul_f16_aligned_s_fp32_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); - vk_pipeline_matmul_f16_aligned_l = ggml_vk_create_pipeline("matmul_f16_aligned_l", matmul_f16_aligned_l_fp32_len, matmul_f16_aligned_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); - vk_pipeline_matmul_f16_aligned_m = ggml_vk_create_pipeline("matmul_f16_aligned_m", matmul_f16_aligned_m_fp32_len, matmul_f16_aligned_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); - vk_pipeline_matmul_f16_aligned_s = ggml_vk_create_pipeline("matmul_f16_aligned_s", matmul_f16_aligned_s_fp32_len, matmul_f16_aligned_s_fp32_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); - - vk_pipeline_matmul_f16_f32_l = ggml_vk_create_pipeline("matmul_f16_f32_l", matmul_f16_f32_l_fp32_len, matmul_f16_f32_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, 1); - vk_pipeline_matmul_f16_f32_m = ggml_vk_create_pipeline("matmul_f16_f32_m", matmul_f16_f32_m_fp32_len, matmul_f16_f32_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, 1); - vk_pipeline_matmul_f16_f32_s = ggml_vk_create_pipeline("matmul_f16_f32_s", matmul_f16_f32_s_fp32_len, matmul_f16_f32_s_fp32_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, 1); - vk_pipeline_matmul_f16_f32_aligned_l = ggml_vk_create_pipeline("matmul_f16_f32_aligned_l", matmul_f16_f32_aligned_l_fp32_len, matmul_f16_f32_aligned_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); - vk_pipeline_matmul_f16_f32_aligned_m = ggml_vk_create_pipeline("matmul_f16_f32_aligned_m", matmul_f16_f32_aligned_m_fp32_len, matmul_f16_f32_aligned_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); - vk_pipeline_matmul_f16_f32_aligned_s = ggml_vk_create_pipeline("matmul_f16_f32_aligned_s", matmul_f16_f32_aligned_s_fp32_len, matmul_f16_f32_aligned_s_fp32_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_f32_l, "matmul_f16_f32_l", matmul_f16_f32_l_fp32_len, matmul_f16_f32_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_f32_m, "matmul_f16_f32_m", matmul_f16_f32_m_fp32_len, matmul_f16_f32_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_f32_s, "matmul_f16_f32_s", matmul_f16_f32_s_fp32_len, matmul_f16_f32_s_fp32_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_f32_aligned_l, "matmul_f16_f32_aligned_l", matmul_f16_f32_aligned_l_fp32_len, matmul_f16_f32_aligned_l_fp32_data, "main", 3, 14 * sizeof(uint32_t), l_wg_denoms, warptile_l, l_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_f32_aligned_m, "matmul_f16_f32_aligned_m", matmul_f16_f32_aligned_m_fp32_len, matmul_f16_f32_aligned_m_fp32_data, "main", 3, 14 * sizeof(uint32_t), m_wg_denoms, warptile_m, m_align); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_f16_f32_aligned_s, "matmul_f16_f32_aligned_s", matmul_f16_f32_aligned_s_fp32_len, matmul_f16_f32_aligned_s_fp32_data, "main", 3, 14 * sizeof(uint32_t), s_wg_denoms, warptile_s, s_align); } - vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_F16] = ggml_vk_create_pipeline("mul_mat_vec_f16_f32", mul_mat_vec_f16_f32_len, mul_mat_vec_f16_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); - vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("mul_mat_vec_q4_0_f32", mul_mat_vec_q4_0_f32_len, mul_mat_vec_q4_0_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); - vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("mul_mat_vec_q4_1_f32", mul_mat_vec_q4_1_f32_len, mul_mat_vec_q4_1_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); - vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("mul_mat_vec_q5_0_f32", mul_mat_vec_q5_0_f32_len, mul_mat_vec_q5_0_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); - vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("mul_mat_vec_q5_1_f32", mul_mat_vec_q5_1_f32_len, mul_mat_vec_q5_1_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); - vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("mul_mat_vec_q8_0_f32", mul_mat_vec_q8_0_f32_len, mul_mat_vec_q8_0_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); - vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q2_K] = ggml_vk_create_pipeline("mul_mat_vec_q2_K_f32", mul_mat_vec_q2_K_f32_len, mul_mat_vec_q2_K_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); - vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q3_K] = ggml_vk_create_pipeline("mul_mat_vec_q3_K_f32", mul_mat_vec_q3_K_f32_len, mul_mat_vec_q3_K_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); - vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q4_K] = ggml_vk_create_pipeline("mul_mat_vec_q4_K_f32", mul_mat_vec_q4_K_f32_len, mul_mat_vec_q4_K_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); - vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q5_K] = ggml_vk_create_pipeline("mul_mat_vec_q5_K_f32", mul_mat_vec_q5_K_f32_len, mul_mat_vec_q5_K_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); - vk_pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q6_K] = ggml_vk_create_pipeline("mul_mat_vec_q6_K_f32", mul_mat_vec_q6_K_f32_len, mul_mat_vec_q6_K_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_F16 ], "mul_mat_vec_f16_f32", mul_mat_vec_f16_f32_len, mul_mat_vec_f16_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q4_0], "mul_mat_vec_q4_0_f32", mul_mat_vec_q4_0_f32_len, mul_mat_vec_q4_0_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q4_1], "mul_mat_vec_q4_1_f32", mul_mat_vec_q4_1_f32_len, mul_mat_vec_q4_1_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q5_0], "mul_mat_vec_q5_0_f32", mul_mat_vec_q5_0_f32_len, mul_mat_vec_q5_0_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q5_1], "mul_mat_vec_q5_1_f32", mul_mat_vec_q5_1_f32_len, mul_mat_vec_q5_1_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q8_0], "mul_mat_vec_q8_0_f32", mul_mat_vec_q8_0_f32_len, mul_mat_vec_q8_0_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q2_K], "mul_mat_vec_q2_K_f32", mul_mat_vec_q2_K_f32_len, mul_mat_vec_q2_K_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q3_K], "mul_mat_vec_q3_K_f32", mul_mat_vec_q3_K_f32_len, mul_mat_vec_q3_K_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q4_K], "mul_mat_vec_q4_K_f32", mul_mat_vec_q4_K_f32_len, mul_mat_vec_q4_K_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q5_K], "mul_mat_vec_q5_K_f32", mul_mat_vec_q5_K_f32_len, mul_mat_vec_q5_K_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant_mul_mat_vec_f32[GGML_TYPE_Q6_K], "mul_mat_vec_q6_K_f32", mul_mat_vec_q6_K_f32_len, mul_mat_vec_q6_K_f32_data, "main", 3, 3 * sizeof(int), {1, 1, 1}, {}, 1); // dequant shaders - vk_pipeline_dequant[GGML_TYPE_F32] = ggml_vk_create_pipeline("f32_to_f16", f32_to_f16_len, f32_to_f16_data, "main", 2, 4 * sizeof(int), {64, 1, 1}, {}, 1); - - vk_pipeline_dequant[GGML_TYPE_F16] = ggml_vk_create_pipeline("dequant_f16", dequant_f16_len, dequant_f16_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("dequant_q4_0", dequant_q4_0_len, dequant_q4_0_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("dequant_q4_1", dequant_q4_1_len, dequant_q4_1_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("dequant_q5_0", dequant_q5_0_len, dequant_q5_0_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("dequant_q5_1", dequant_q5_1_len, dequant_q5_1_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("dequant_q8_0", dequant_q8_0_len, dequant_q8_0_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q2_K] = ggml_vk_create_pipeline("dequant_q2_K", dequant_q2_K_len, dequant_q2_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q3_K] = ggml_vk_create_pipeline("dequant_q3_K", dequant_q3_K_len, dequant_q3_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q4_K] = ggml_vk_create_pipeline("dequant_q4_K", dequant_q4_K_len, dequant_q4_K_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q5_K] = ggml_vk_create_pipeline("dequant_q5_K", dequant_q5_K_len, dequant_q5_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); - vk_pipeline_dequant[GGML_TYPE_Q6_K] = ggml_vk_create_pipeline("dequant_q6_K", dequant_q6_K_len, dequant_q6_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant[GGML_TYPE_F32 ], "f32_to_f16", f32_to_f16_len, f32_to_f16_data, "main", 2, 4 * sizeof(int), { 64, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant[GGML_TYPE_F16 ], "dequant_f16", dequant_f16_len, dequant_f16_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant[GGML_TYPE_Q4_0], "dequant_q4_0", dequant_q4_0_len, dequant_q4_0_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant[GGML_TYPE_Q4_1], "dequant_q4_1", dequant_q4_1_len, dequant_q4_1_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant[GGML_TYPE_Q5_0], "dequant_q5_0", dequant_q5_0_len, dequant_q5_0_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant[GGML_TYPE_Q5_1], "dequant_q5_1", dequant_q5_1_len, dequant_q5_1_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant[GGML_TYPE_Q8_0], "dequant_q8_0", dequant_q8_0_len, dequant_q8_0_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant[GGML_TYPE_Q2_K], "dequant_q2_K", dequant_q2_K_len, dequant_q2_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant[GGML_TYPE_Q3_K], "dequant_q3_K", dequant_q3_K_len, dequant_q3_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant[GGML_TYPE_Q4_K], "dequant_q4_K", dequant_q4_K_len, dequant_q4_K_data, "main", 2, 4 * sizeof(int), {256 * 32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant[GGML_TYPE_Q5_K], "dequant_q5_K", dequant_q5_K_len, dequant_q5_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_dequant[GGML_TYPE_Q6_K], "dequant_q6_K", dequant_q6_K_len, dequant_q6_K_data, "main", 2, 4 * sizeof(int), {256 * 64, 1, 1}, {}, 1); // get_rows - vk_pipeline_get_rows[GGML_TYPE_F16] = ggml_vk_create_pipeline("get_rows_f16", get_rows_f16_len, get_rows_f16_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("get_rows_q4_0", get_rows_q4_0_len, get_rows_q4_0_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("get_rows_q4_1", get_rows_q4_1_len, get_rows_q4_1_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("get_rows_q5_0", get_rows_q5_0_len, get_rows_q5_0_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("get_rows_q5_1", get_rows_q5_1_len, get_rows_q5_1_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("get_rows_q8_0", get_rows_q8_0_len, get_rows_q8_0_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_get_rows[GGML_TYPE_F16 ], "get_rows_f16", get_rows_f16_len, get_rows_f16_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_get_rows[GGML_TYPE_Q4_0], "get_rows_q4_0", get_rows_q4_0_len, get_rows_q4_0_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_get_rows[GGML_TYPE_Q4_1], "get_rows_q4_1", get_rows_q4_1_len, get_rows_q4_1_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_get_rows[GGML_TYPE_Q5_0], "get_rows_q5_0", get_rows_q5_0_len, get_rows_q5_0_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_get_rows[GGML_TYPE_Q5_1], "get_rows_q5_1", get_rows_q5_1_len, get_rows_q5_1_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_get_rows[GGML_TYPE_Q8_0], "get_rows_q8_0", get_rows_q8_0_len, get_rows_q8_0_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_F16] = ggml_vk_create_pipeline("get_rows_f16_f32", get_rows_f16_f32_len, get_rows_f16_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q4_0] = ggml_vk_create_pipeline("get_rows_q4_0_f32", get_rows_q4_0_f32_len, get_rows_q4_0_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q4_1] = ggml_vk_create_pipeline("get_rows_q4_1_f32", get_rows_q4_1_f32_len, get_rows_q4_1_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q5_0] = ggml_vk_create_pipeline("get_rows_q5_0_f32", get_rows_q5_0_f32_len, get_rows_q5_0_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q5_1] = ggml_vk_create_pipeline("get_rows_q5_1_f32", get_rows_q5_1_f32_len, get_rows_q5_1_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_get_rows_f32[GGML_TYPE_Q8_0] = ggml_vk_create_pipeline("get_rows_q8_0_f32", get_rows_q8_0_f32_len, get_rows_q8_0_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_get_rows_f32[GGML_TYPE_F32 ], "get_rows_f16_f32", get_rows_f16_f32_len, get_rows_f16_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_get_rows_f32[GGML_TYPE_Q4_0], "get_rows_q4_0_f32", get_rows_q4_0_f32_len, get_rows_q4_0_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_get_rows_f32[GGML_TYPE_Q4_1], "get_rows_q4_1_f32", get_rows_q4_1_f32_len, get_rows_q4_1_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_get_rows_f32[GGML_TYPE_Q5_0], "get_rows_q5_0_f32", get_rows_q5_0_f32_len, get_rows_q5_0_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_get_rows_f32[GGML_TYPE_Q5_1], "get_rows_q5_1_f32", get_rows_q5_1_f32_len, get_rows_q5_1_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_get_rows_f32[GGML_TYPE_Q8_0], "get_rows_q8_0_f32", get_rows_q8_0_f32_len, get_rows_q8_0_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_matmul_split_k_reduce = ggml_vk_create_pipeline("split_k_reduce", split_k_reduce_len, split_k_reduce_data, "main", 2, 2 * sizeof(uint32_t), {256, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_matmul_split_k_reduce, "split_k_reduce", split_k_reduce_len, split_k_reduce_data, "main", 2, 2 * sizeof(uint32_t), {256, 1, 1}, {}, 1); - vk_pipeline_mul_mat_vec_p021_f16_f32 = ggml_vk_create_pipeline("mul_mat_vec_p021_f16_f32", mul_mat_vec_p021_f16_f32_len, mul_mat_vec_p021_f16_f32_data, "main", 3, 6 * sizeof(uint32_t), {1, 1, 1}, {}, 1); - vk_pipeline_mul_mat_vec_nc_f16_f32 = ggml_vk_create_pipeline("mul_mat_vec_nc_f16_f32", mul_mat_vec_nc_f16_f32_len, mul_mat_vec_nc_f16_f32_data, "main", 3, 7 * sizeof(uint32_t), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_mul_mat_vec_p021_f16_f32, "mul_mat_vec_p021_f16_f32", mul_mat_vec_p021_f16_f32_len, mul_mat_vec_p021_f16_f32_data, "main", 3, 6 * sizeof(uint32_t), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_mul_mat_vec_nc_f16_f32, "mul_mat_vec_nc_f16_f32", mul_mat_vec_nc_f16_f32_len, mul_mat_vec_nc_f16_f32_data, "main", 3, 7 * sizeof(uint32_t), {1, 1, 1}, {}, 1); - vk_pipeline_norm_f32 = ggml_vk_create_pipeline("norm_f32", norm_f32_len, norm_f32_data, "main", 2, sizeof(vk_op_push_constants), {1, 1, 1}, {}, 1); - vk_pipeline_rms_norm_f32 = ggml_vk_create_pipeline("rms_norm_f32", rms_norm_f32_len, rms_norm_f32_data, "main", 2, sizeof(vk_op_push_constants), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_norm_f32, "norm_f32", norm_f32_len, norm_f32_data, "main", 2, sizeof(vk_op_push_constants), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_rms_norm_f32, "rms_norm_f32", rms_norm_f32_len, rms_norm_f32_data, "main", 2, sizeof(vk_op_push_constants), {1, 1, 1}, {}, 1); - vk_pipeline_cpy_f32_f32 = ggml_vk_create_pipeline("cpy_f32_f32", cpy_f32_f32_len, cpy_f32_f32_data, "main", 2, sizeof(vk_op_cpy_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_cpy_f32_f16 = ggml_vk_create_pipeline("cpy_f32_f16", cpy_f32_f16_len, cpy_f32_f16_data, "main", 2, sizeof(vk_op_cpy_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_cpy_f16_f16 = ggml_vk_create_pipeline("cpy_f16_f16", cpy_f16_f16_len, cpy_f16_f16_data, "main", 2, sizeof(vk_op_cpy_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_cpy_f32_f32, "cpy_f32_f32", cpy_f32_f32_len, cpy_f32_f32_data, "main", 2, sizeof(vk_op_cpy_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_cpy_f32_f16, "cpy_f32_f16", cpy_f32_f16_len, cpy_f32_f16_data, "main", 2, sizeof(vk_op_cpy_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_cpy_f16_f16, "cpy_f16_f16", cpy_f16_f16_len, cpy_f16_f16_data, "main", 2, sizeof(vk_op_cpy_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_add_f32 = ggml_vk_create_pipeline("add_f32", add_f32_len, add_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_add_f32, "add_f32", add_f32_len, add_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_mul_f32 = ggml_vk_create_pipeline("mul_f32", mul_f32_len, mul_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_mul_f32, "mul_f32", mul_f32_len, mul_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_scale_f32 = ggml_vk_create_pipeline("scale_f32", scale_f32_len, scale_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_scale_f32, "scale_f32", scale_f32_len, scale_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_sqr_f32 = ggml_vk_create_pipeline("sqr_f32", sqr_f32_len, sqr_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_sqr_f32, "sqr_f32", sqr_f32_len, sqr_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_clamp_f32 = ggml_vk_create_pipeline("clamp_f32", clamp_f32_len, clamp_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_clamp_f32, "clamp_f32", clamp_f32_len, clamp_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_gelu_f32 = ggml_vk_create_pipeline("gelu_f32", gelu_f32_len, gelu_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_silu_f32 = ggml_vk_create_pipeline("silu_f32", silu_f32_len, silu_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_relu_f32 = ggml_vk_create_pipeline("relu_f32", relu_f32_len, relu_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_gelu_f32, "gelu_f32", gelu_f32_len, gelu_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_silu_f32, "silu_f32", silu_f32_len, silu_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_relu_f32, "relu_f32", relu_f32_len, relu_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_diag_mask_inf_f32 = ggml_vk_create_pipeline("diag_mask_inf_f32", diag_mask_inf_f32_len, diag_mask_inf_f32_data, "main", 2, sizeof(vk_op_diag_mask_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_diag_mask_inf_f32, "diag_mask_inf_f32", diag_mask_inf_f32_len, diag_mask_inf_f32_data, "main", 2, sizeof(vk_op_diag_mask_push_constants), {512, 1, 1}, {}, 1); - vk_pipeline_soft_max_f32 = ggml_vk_create_pipeline("soft_max_f32", soft_max_f32_len, soft_max_f32_data, "main", 3, sizeof(vk_op_push_constants), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_soft_max_f32, "soft_max_f32", soft_max_f32_len, soft_max_f32_data, "main", 3, sizeof(vk_op_push_constants), {1, 1, 1}, {}, 1); - vk_pipeline_rope_f32 = ggml_vk_create_pipeline("rope_f32", rope_f32_len, rope_f32_data, "main", 3, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1); - vk_pipeline_rope_f16 = ggml_vk_create_pipeline("rope_f16", rope_f16_len, rope_f16_data, "main", 3, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_rope_f32, "rope_f32", rope_f32_len, rope_f32_data, "main", 3, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_rope_f16, "rope_f16", rope_f16_len, rope_f16_data, "main", 3, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1); - vk_pipeline_rope_neox_f32 = ggml_vk_create_pipeline("rope_neox_f32", rope_neox_f32_len, rope_neox_f32_data, "main", 3, sizeof(vk_op_rope_neox_push_constants), {1, 512, 1}, {}, 1); - vk_pipeline_rope_neox_f16 = ggml_vk_create_pipeline("rope_neox_f16", rope_neox_f16_len, rope_neox_f16_data, "main", 3, sizeof(vk_op_rope_neox_push_constants), {1, 512, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_rope_neox_f32, "rope_neox_f32", rope_neox_f32_len, rope_neox_f32_data, "main", 3, sizeof(vk_op_rope_neox_push_constants), {1, 512, 1}, {}, 1); + ggml_vk_create_pipeline(ctx, ctx->pipeline_rope_neox_f16, "rope_neox_f16", rope_neox_f16_len, rope_neox_f16_data, "main", 3, sizeof(vk_op_rope_neox_push_constants), {1, 512, 1}, {}, 1); } -void ggml_vk_init() { +static void ggml_vk_print_gpu_info(size_t idx) { + GGML_ASSERT(idx < vk_instance.device_indices.size()); + size_t dev_num = vk_instance.device_indices[idx]; #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_init()" << std::endl; + std::cerr << "ggml_vk_print_gpu_info(" << dev_num << ")" << std::endl; #endif - static bool initialized = false; + GGML_ASSERT(vk_instance.initialized); - if (initialized) { - return; + std::vector devices = vk_instance.instance.enumeratePhysicalDevices(); + + if (dev_num >= devices.size()) { + std::cerr << "ggml_vulkan: Device with index " << dev_num << " does not exist." << std::endl; + throw std::runtime_error("Device not found"); } - initialized = true; + vk::PhysicalDevice physical_device = devices[dev_num]; + std::vector ext_props = physical_device.enumerateDeviceExtensionProperties(); - const char* GGML_VULKAN_DEVICE = getenv("GGML_VULKAN_DEVICE"); - int dev_num = (GGML_VULKAN_DEVICE == NULL ? 0 : atoi(GGML_VULKAN_DEVICE)); + vk::PhysicalDeviceProperties2 props2; + vk::PhysicalDeviceMaintenance3Properties props3; + vk::PhysicalDeviceSubgroupProperties subgroup_props; + props2.pNext = &props3; + props3.pNext = &subgroup_props; + physical_device.getProperties2(&props2); + + const size_t subgroup_size = subgroup_props.subgroupSize; + const bool uma = props2.properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu; + + bool fp16_storage = false; + bool fp16_compute = false; + + for (auto properties : ext_props) { + if (strcmp("VK_KHR_16bit_storage", properties.extensionName) == 0) { + fp16_storage = true; + } else if (strcmp("VK_KHR_shader_float16_int8", properties.extensionName) == 0) { + fp16_compute = true; + } + } + + const char* GGML_VULKAN_DISABLE_F16 = getenv("GGML_VULKAN_DISABLE_F16"); + bool force_disable_f16 = GGML_VULKAN_DISABLE_F16 != nullptr; + + bool fp16 = !force_disable_f16 && fp16_storage && fp16_compute; + + vk::PhysicalDeviceFeatures device_features = physical_device.getFeatures(); + + VkPhysicalDeviceFeatures2 device_features2; + device_features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + device_features2.pNext = nullptr; + device_features2.features = (VkPhysicalDeviceFeatures)device_features; + + VkPhysicalDeviceVulkan11Features vk11_features; + vk11_features.pNext = nullptr; + vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; + device_features2.pNext = &vk11_features; + + VkPhysicalDeviceVulkan12Features vk12_features; + vk12_features.pNext = nullptr; + vk12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; + vk11_features.pNext = &vk12_features; + + vkGetPhysicalDeviceFeatures2(physical_device, &device_features2); + + fp16 = fp16 && vk12_features.shaderFloat16; + + std::string device_name = props2.properties.deviceName.data(); + std::cerr << GGML_VK_NAME << idx << ": " << device_name << " | uma: " << uma << " | fp16: " << fp16 << " | warp size: " << subgroup_size << std::endl; + + if (props2.properties.deviceType == vk::PhysicalDeviceType::eCpu) { + std::cerr << "ggml_vulkan: Warning: Device type is CPU. This is probably not the device you want." << std::endl; + } +} + +void ggml_vk_instance_init() { + if (vk_instance_initialized) { + return; + } +#ifdef GGML_VULKAN_DEBUG + std::cerr << "ggml_vk_instance_init()" << std::endl; +#endif vk::ApplicationInfo app_info{ "ggml-vulkan", 1, nullptr, 0, VK_API_VERSION }; const std::vector layers = { @@ -989,12 +1106,55 @@ void ggml_vk_init() { validation_features.setPNext(nullptr); instance_create_info.setPNext(&validation_features); -std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; + std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; #endif - vk_instance = vk::createInstance(instance_create_info); + vk_instance.instance = vk::createInstance(instance_create_info); - vk_device.physical_device = vk_instance.enumeratePhysicalDevices()[dev_num]; - std::vector ext_props = vk_device.physical_device.enumerateDeviceExtensionProperties(); + memset(vk_instance.initialized, 0, sizeof(bool) * GGML_VK_MAX_DEVICES); + + size_t num_available_devices = vk_instance.instance.enumeratePhysicalDevices().size(); + + // Emulate behavior of CUDA_VISIBLE_DEVICES for Vulkan + char * devices_env = getenv("GGML_VK_VISIBLE_DEVICES"); + if (devices_env != nullptr) { + std::string devices(devices_env); + std::replace(devices.begin(), devices.end(), ',', ' '); + + std::stringstream ss(devices); + size_t tmp; + while (ss >> tmp) { + if(tmp >= num_available_devices) { + std::cerr << "ggml_vulkan: Invalid device index " << tmp << " in GGML_VK_VISIBLE_DEVICES." << std::endl; + throw std::runtime_error("Invalid Vulkan device index"); + } + vk_instance.device_indices.push_back(tmp); + } + } else { + vk_instance.device_indices.push_back(0); + } + + vk_instance_initialized = true; +} + +void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) { + GGML_ASSERT(idx < vk_instance.device_indices.size()); + size_t dev_num = vk_instance.device_indices[idx]; +#ifdef GGML_VULKAN_DEBUG + std::cerr << "ggml_vk_init(" << ctx->name << ", " << dev_num << ")" << std::endl; +#endif + ggml_vk_instance_init(); + + std::vector devices = vk_instance.instance.enumeratePhysicalDevices(); + + if (dev_num >= devices.size()) { + std::cerr << "ggml_vulkan: Device with index " << dev_num << " does not exist." << std::endl; + throw std::runtime_error("Device not found"); + } + + vk_instance.devices[idx] = std::make_shared(); + ctx->device = vk_instance.devices[idx]; + ctx->device.lock()->physical_device = devices[dev_num]; + std::vector ext_props = ctx->device.lock()->physical_device.enumerateDeviceExtensionProperties(); bool maintenance4_support = false; @@ -1014,18 +1174,18 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; if (maintenance4_support) { subgroup_props.pNext = &props4; } - vk_device.physical_device.getProperties2(&props2); - vk_device.properties = props2.properties; + ctx->device.lock()->physical_device.getProperties2(&props2); + ctx->device.lock()->properties = props2.properties; if (maintenance4_support) { - vk_device.max_memory_allocation_size = std::min(props3.maxMemoryAllocationSize, props4.maxBufferSize); + ctx->device.lock()->max_memory_allocation_size = std::min(props3.maxMemoryAllocationSize, props4.maxBufferSize); } else { - vk_device.max_memory_allocation_size = props3.maxMemoryAllocationSize; + ctx->device.lock()->max_memory_allocation_size = props3.maxMemoryAllocationSize; } - vk_device.vendor_id = vk_device.properties.vendorID; - vk_device.subgroup_size = subgroup_props.subgroupSize; - vk_device.uma = vk_device.properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu; + ctx->device.lock()->vendor_id = ctx->device.lock()->properties.vendorID; + ctx->device.lock()->subgroup_size = subgroup_props.subgroupSize; + ctx->device.lock()->uma = ctx->device.lock()->properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu; bool fp16_storage = false; bool fp16_compute = false; @@ -1039,31 +1199,31 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; } const char* GGML_VULKAN_DISABLE_F16 = getenv("GGML_VULKAN_DISABLE_F16"); - bool force_disable_f16 = GGML_VULKAN_DISABLE_F16 != NULL; + bool force_disable_f16 = GGML_VULKAN_DISABLE_F16 != nullptr; - vk_device.fp16 = !force_disable_f16 && fp16_storage && fp16_compute; + ctx->device.lock()->fp16 = !force_disable_f16 && fp16_storage && fp16_compute; - std::vector queue_family_props = vk_device.physical_device.getQueueFamilyProperties(); + std::vector queue_family_props = ctx->device.lock()->physical_device.getQueueFamilyProperties(); // Try to find a non-graphics compute queue and transfer-focused queues const uint32_t compute_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eCompute, vk::QueueFlagBits::eGraphics, -1, 1); const uint32_t transfer_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eTransfer, vk::QueueFlagBits::eCompute | vk::QueueFlagBits::eGraphics, compute_queue_family_index, 1); const float priorities[] = { 1.0f, 1.0f }; - const bool single_queue = compute_queue_family_index == transfer_queue_family_index && queue_family_props[compute_queue_family_index].queueCount == 1; + ctx->device.lock()->single_queue = compute_queue_family_index == transfer_queue_family_index && queue_family_props[compute_queue_family_index].queueCount == 1; std::vector device_queue_create_infos; if (compute_queue_family_index != transfer_queue_family_index) { device_queue_create_infos.push_back({vk::DeviceQueueCreateFlags(), compute_queue_family_index, 1, priorities}); device_queue_create_infos.push_back({vk::DeviceQueueCreateFlags(), transfer_queue_family_index, 1, priorities + 1}); - } else if(!single_queue) { + } else if(!ctx->device.lock()->single_queue) { device_queue_create_infos.push_back({vk::DeviceQueueCreateFlags(), compute_queue_family_index, 2, priorities}); } else { device_queue_create_infos.push_back({vk::DeviceQueueCreateFlags(), compute_queue_family_index, 1, priorities}); } vk::DeviceCreateInfo device_create_info; std::vector device_extensions; - vk::PhysicalDeviceFeatures device_features = vk_device.physical_device.getFeatures(); + vk::PhysicalDeviceFeatures device_features = ctx->device.lock()->physical_device.getFeatures(); VkPhysicalDeviceFeatures2 device_features2; device_features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -1080,13 +1240,13 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; vk12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; vk11_features.pNext = &vk12_features; - vkGetPhysicalDeviceFeatures2(vk_device.physical_device, &device_features2); + vkGetPhysicalDeviceFeatures2(ctx->device.lock()->physical_device, &device_features2); - vk_device.fp16 = vk_device.fp16 && vk12_features.shaderFloat16; + ctx->device.lock()->fp16 = ctx->device.lock()->fp16 && vk12_features.shaderFloat16; if (!vk11_features.storageBuffer16BitAccess) { - std::cerr << "ggml_vulkan: device does not support 16-bit storage" << std::endl; - GGML_ASSERT(false); + std::cerr << "ggml_vulkan: device " << GGML_VK_NAME << idx << " does not support 16-bit storage." << std::endl; + throw std::runtime_error("Unsupported device"); } device_extensions.push_back("VK_KHR_16bit_storage"); @@ -1095,10 +1255,11 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; device_extensions.push_back("VK_KHR_shader_non_semantic_info"); #endif - if (vk_device.fp16) { + if (ctx->device.lock()->fp16) { device_extensions.push_back("VK_KHR_shader_float16_int8"); } - std::cerr << "ggml_vulkan: Using " << vk_device.properties.deviceName << " | uma: " << vk_device.uma << " | fp16: " << vk_device.fp16 << " | warp size: " << vk_device.subgroup_size << std::endl; + ctx->device.lock()->name = ctx->device.lock()->properties.deviceName.data(); + device_create_info = { vk::DeviceCreateFlags(), device_queue_create_infos, @@ -1106,28 +1267,32 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; device_extensions }; device_create_info.setPNext(&device_features2); - vk_device.device = vk_device.physical_device.createDevice(device_create_info); + ctx->device.lock()->device = ctx->device.lock()->physical_device.createDevice(device_create_info); - vk_device.descriptor_set_mode = VK_DEVICE_DESCRIPTOR_POOL_MODE_UNKNOWN; + ctx->device.lock()->descriptor_set_mode = VK_DEVICE_DESCRIPTOR_POOL_MODE_UNKNOWN; // Shaders - ggml_vk_load_shaders(); + ggml_vk_load_shaders(ctx); // Queues - vk_device.compute_queue = ggml_vk_create_queue(compute_queue_family_index, 0, { vk::PipelineStageFlagBits::eComputeShader | vk::PipelineStageFlagBits::eTransfer }); - if (!single_queue) { + ggml_vk_create_queue(ctx, ctx->device.lock()->compute_queue, compute_queue_family_index, 0, { vk::PipelineStageFlagBits::eComputeShader | vk::PipelineStageFlagBits::eTransfer }); + if (!ctx->device.lock()->single_queue) { const uint32_t transfer_queue_index = compute_queue_family_index == transfer_queue_family_index ? 1 : 0; - vk_device.transfer_queue = ggml_vk_create_queue(transfer_queue_family_index, transfer_queue_index, { vk::PipelineStageFlagBits::eTransfer }); + ggml_vk_create_queue(ctx, ctx->device.lock()->transfer_queue, transfer_queue_family_index, transfer_queue_index, { vk::PipelineStageFlagBits::eTransfer }); } else { - vk_device.transfer_queue = vk_device.compute_queue; + // TODO: Use pointer or reference to avoid copy + ctx->device.lock()->transfer_queue = ctx->device.lock()->compute_queue; } - vk_fence = vk_device.device.createFence({}); + ctx->fence = ctx->device.lock()->device.createFence({}); - vk_ctx = nullptr; - vk_transfer_ctx = nullptr; + ctx->compute_ctx = nullptr; + ctx->transfer_ctx = nullptr; - vk_disable = false; + ctx->disable = false; + ctx->initialized = true; + + ctx->idx = idx; #ifdef GGML_VULKAN_CHECK_RESULTS const char* skip_checks = getenv("GGML_VULKAN_SKIP_CHECKS"); @@ -1137,7 +1302,7 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; #endif } -static vk_pipeline* ggml_vk_get_to_fp16(ggml_type type) { +static vk_pipeline* ggml_vk_get_to_fp16(ggml_backend_vk_context * ctx, ggml_type type) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_get_to_fp16()" << std::endl; #endif @@ -1158,10 +1323,10 @@ static vk_pipeline* ggml_vk_get_to_fp16(ggml_type type) { return nullptr; } - return &vk_pipeline_dequant[type]; + return &ctx->pipeline_dequant[type]; } -static vk_pipeline* ggml_vk_get_dequantize_mul_mat_vec(ggml_type type) { +static vk_pipeline* ggml_vk_get_dequantize_mul_mat_vec(ggml_backend_vk_context * ctx, ggml_type type) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_get_dequantize_mul_mat_vec()" << std::endl; #endif @@ -1182,15 +1347,10 @@ static vk_pipeline* ggml_vk_get_dequantize_mul_mat_vec(ggml_type type) { return nullptr; } - return &vk_pipeline_dequant_mul_mat_vec_f32[type]; + return &ctx->pipeline_dequant_mul_mat_vec_f32[type]; } -// buffer pool for vulkan -#define MAX_VK_BUFFERS 256 - -static vk_buffer g_vk_buffer_pool[MAX_VK_BUFFERS]; - -static vk_buffer ggml_vk_pool_malloc(size_t size) { +static vk_buffer ggml_vk_pool_malloc(ggml_backend_vk_context * ctx, size_t size) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_pool_malloc(" << size << ")" << std::endl; #endif @@ -1199,98 +1359,95 @@ static vk_buffer ggml_vk_pool_malloc(size_t size) { int worst_i = -1; size_t worst_size = 0; //largest unused buffer seen so far for (int i = 0; i < MAX_VK_BUFFERS; ++i) { - vk_buffer &b = g_vk_buffer_pool[i]; - if (b.size > 0 && b.size >= size && b.size < best_size) { + vk_buffer &b = ctx->buffer_pool[i]; + if (b != nullptr && b->size >= size && b->size < best_size) { best_i = i; - best_size = b.size; + best_size = b->size; } - if (b.size > 0 && b.size > worst_size) { + if (b != nullptr && b->size > worst_size) { worst_i = i; - worst_size = b.size; + worst_size = b->size; } } if(best_i != -1) { //found the smallest buffer that fits our needs - vk_buffer b = g_vk_buffer_pool[best_i]; - g_vk_buffer_pool[best_i].size = 0; + vk_buffer b = ctx->buffer_pool[best_i]; + ctx->buffer_pool[best_i].reset(); return b; } if(worst_i != -1) { //no buffer that fits our needs, resize largest one to save memory - vk_buffer& b = g_vk_buffer_pool[worst_i]; + vk_buffer& b = ctx->buffer_pool[worst_i]; ggml_vk_destroy_buffer(b); } - return ggml_vk_create_buffer_check(size, vk::MemoryPropertyFlagBits::eDeviceLocal); + return ggml_vk_create_buffer_check(ctx, size, vk::MemoryPropertyFlagBits::eDeviceLocal); } -static void ggml_vk_pool_free(vk_buffer& buffer) { +static void ggml_vk_pool_free(ggml_backend_vk_context * ctx, vk_buffer& buffer) { #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_pool_free(" << buffer.size << ")" << std::endl; + std::cerr << "ggml_vk_pool_free(" << buffer->size << ")" << std::endl; #endif for (int i = 0; i < MAX_VK_BUFFERS; ++i) { - vk_buffer& b = g_vk_buffer_pool[i]; - if (b.size == 0) { + vk_buffer& b = ctx->buffer_pool[i]; + if (b == nullptr) { b = buffer; - // Set owning queue family index to ignored to avoid synchronization on next use - b.qf_owner = VK_QUEUE_FAMILY_IGNORED; return; } } - fprintf(stderr, "WARNING: vk buffer pool full, increase MAX_VK_BUFFERS\n"); + std::cerr << "ggml_vulkan: WARNING: vk buffer pool full, increase MAX_VK_BUFFERS" << std::endl; ggml_vk_destroy_buffer(buffer); } // Returns an available temporary buffer that may only be used temporarily, it will be reused -static vk_buffer ggml_vk_create_buffer_temp(size_t size) { +static vk_buffer ggml_vk_create_buffer_temp(ggml_backend_vk_context * ctx, size_t size) { // Try to find existing temp buffer with enough capacity - for (auto& buffer : vk_gc.temp_buffers) { - if (buffer.size >= size) { + for (auto& buffer : ctx->gc.temp_buffers) { + if (buffer->size >= size) { return buffer; } } // Otherwise create new buffer - vk_buffer buf = ggml_vk_pool_malloc(size); - vk_gc.temp_buffers.push_back(buf); + vk_buffer buf = ggml_vk_pool_malloc(ctx, size); + ctx->gc.temp_buffers.push_back(buf); return buf; } -static void * ggml_vk_host_malloc(size_t size) { +static void * ggml_vk_host_malloc(ggml_backend_vk_context * ctx, size_t size) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_host_malloc(" << size << ")" << std::endl; #endif - vk_buffer buf = ggml_vk_create_buffer(size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); + vk_buffer buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); - if(!(buf.memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible)) { + if(!(buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible)) { fprintf(stderr, "WARNING: failed to allocate %.2f MB of pinned memory\n", size/1024.0/1024.0); - buf.size = 0; - vk_device.device.freeMemory(buf.device_memory); - vk_device.device.destroyBuffer(buf.buffer); + ctx->device.lock()->device.freeMemory(buf->device_memory); + ctx->device.lock()->device.destroyBuffer(buf->buffer); return nullptr; } - vk_pinned_memory.push_back(std::make_tuple(buf.ptr, size, buf)); + ctx->pinned_memory.push_back(std::make_tuple(buf->ptr, size, buf)); - return buf.ptr; + return buf->ptr; } -static void ggml_vk_host_free(void* ptr) { +static void ggml_vk_host_free(ggml_backend_vk_context * ctx, void* ptr) { if (ptr == nullptr) { return; } #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_host_free(" << ptr << ")" << std::endl; #endif - vk_buffer* buf = nullptr; + vk_buffer buf; size_t index; - for (size_t i = 0; i < vk_pinned_memory.size(); i++) { - const uint8_t* addr = (const uint8_t*) std::get<0>(vk_pinned_memory[i]); - const uint8_t* endr = addr + std::get<1>(vk_pinned_memory[i]); + for (size_t i = 0; i < ctx->pinned_memory.size(); i++) { + const uint8_t* addr = (const uint8_t*) std::get<0>(ctx->pinned_memory[i]); + const uint8_t* endr = addr + std::get<1>(ctx->pinned_memory[i]); if (ptr >= addr && ptr < endr) { - buf = &std::get<2>(vk_pinned_memory[i]); + buf = std::get<2>(ctx->pinned_memory[i]); index = i; break; } @@ -1300,28 +1457,28 @@ static void ggml_vk_host_free(void* ptr) { return; } - ggml_vk_destroy_buffer(*buf); + ggml_vk_destroy_buffer(buf); - vk_pinned_memory.erase(vk_pinned_memory.begin() + index); + ctx->pinned_memory.erase(ctx->pinned_memory.begin() + index); } -static void ggml_vk_host_get(const void * ptr, vk_buffer *& buf, size_t& buf_offset) { +static void ggml_vk_host_get(ggml_backend_vk_context * ctx, const void * ptr, vk_buffer& buf, size_t& buf_offset) { buf = nullptr; buf_offset = 0; - for (size_t i = 0; i < vk_pinned_memory.size(); i++) { - const uint8_t* addr = (const uint8_t*) std::get<0>(vk_pinned_memory[i]); - const uint8_t* endr = addr + std::get<1>(vk_pinned_memory[i]); + for (size_t i = 0; i < ctx->pinned_memory.size(); i++) { + const uint8_t* addr = (const uint8_t*) std::get<0>(ctx->pinned_memory[i]); + const uint8_t* endr = addr + std::get<1>(ctx->pinned_memory[i]); if (ptr >= addr && ptr < endr) { - buf = &std::get<2>(vk_pinned_memory[i]); + buf = std::get<2>(ctx->pinned_memory[i]); buf_offset = ((const uint8_t *)ptr) - addr; break; } } } -static vk_submission ggml_vk_begin_submission(vk_queue& q, bool one_time = true) { +static vk_submission ggml_vk_begin_submission(ggml_backend_vk_context * ctx, vk_queue& q, bool one_time = true) { vk_submission s; - s.buffer = ggml_vk_create_cmd_buffer(q); + s.buffer = ggml_vk_create_cmd_buffer(ctx, q); if (one_time) { s.buffer.begin({ vk::CommandBufferUsageFlagBits::eOneTimeSubmit }); } else { @@ -1331,7 +1488,7 @@ static vk_submission ggml_vk_begin_submission(vk_queue& q, bool one_time = true) return s; } -static void ggml_vk_dispatch_pipeline(vk_context * ctx, vk_pipeline& pipeline, std::vector&& buffers, size_t push_constant_size, const void* push_constants, std::array elements) { +static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context * ctx, vk_context * subctx, vk_pipeline& pipeline, std::vector&& buffers, size_t push_constant_size, const void* push_constants, std::array elements) { const uint32_t wg0 = CEIL_DIV(elements[0], pipeline.wg_denoms[0]); const uint32_t wg1 = CEIL_DIV(elements[1], pipeline.wg_denoms[1]); const uint32_t wg2 = CEIL_DIV(elements[2], pipeline.wg_denoms[2]); @@ -1344,22 +1501,22 @@ static void ggml_vk_dispatch_pipeline(vk_context * ctx, vk_pipeline& pipeline, s GGML_ASSERT(buffers.size() == pipeline.parameter_count); vk::DescriptorSet& descriptor_set = pipeline.descriptor_sets[pipeline.descriptor_set_idx++]; for (uint32_t i = 0; i < pipeline.parameter_count; i++) { - descriptor_buffer_infos.push_back({buffers[i].buffer.buffer, buffers[i].offset, buffers[i].size}); + descriptor_buffer_infos.push_back({buffers[i].buffer->buffer, buffers[i].offset, buffers[i].size}); } for (uint32_t i = 0; i < pipeline.parameter_count; i++) { write_descriptor_sets.push_back({descriptor_set, i, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &descriptor_buffer_infos[i]}); } - vk_device.device.updateDescriptorSets(write_descriptor_sets, {}); + ctx->device.lock()->device.updateDescriptorSets(write_descriptor_sets, {}); - ctx->s->buffer.pushConstants(pipeline.layout, vk::ShaderStageFlagBits::eCompute, 0, push_constant_size, push_constants); - ctx->s->buffer.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline.pipeline); - ctx->s->buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, + subctx->s->buffer.pushConstants(pipeline.layout, vk::ShaderStageFlagBits::eCompute, 0, push_constant_size, push_constants); + subctx->s->buffer.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline.pipeline); + subctx->s->buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, pipeline.layout, 0, { descriptor_set }, {}); - ctx->s->buffer.dispatch(wg0, wg1, wg2); + subctx->s->buffer.dispatch(wg0, wg1, wg2); } static void ggml_vk_end_submission(vk_submission& s, std::vector wait_semaphores, std::vector signal_semaphores) { @@ -1381,16 +1538,16 @@ static void ggml_vk_ctx_end(vk_context * ctx) { ctx->s = nullptr; } -static void ggml_vk_ctx_begin(vk_context * ctx) { +static void ggml_vk_ctx_begin(ggml_backend_vk_context * ctx, vk_context * subctx) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_ctx_begin(" << ctx << ")" << std::endl; #endif - if (ctx->s != nullptr) { - ggml_vk_ctx_end(ctx); + if (subctx->s != nullptr) { + ggml_vk_ctx_end(subctx); } - ctx->seqs.push_back({ ggml_vk_begin_submission(*ctx->q) }); - ctx->s = ctx->seqs[ctx->seqs.size() - 1].data(); + subctx->seqs.push_back({ ggml_vk_begin_submission(ctx, *subctx->q) }); + subctx->s = subctx->seqs[subctx->seqs.size() - 1].data(); } static size_t ggml_vk_align_size(size_t width, size_t align) { @@ -1405,14 +1562,14 @@ static void deferred_memcpy(void * dst, const void * src, size_t size, std::vect } } -static void ensure_sync_staging_buffer(size_t size) { - if (vk_sync_staging.size < size) { - ggml_vk_destroy_buffer(vk_sync_staging); - vk_sync_staging = ggml_vk_create_buffer_check(size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); +static void ggml_vk_ensure_sync_staging_buffer(ggml_backend_vk_context * ctx, size_t size) { + if (ctx->sync_staging == nullptr || ctx->sync_staging->size < size) { + ggml_vk_destroy_buffer(ctx->sync_staging); + ctx->sync_staging = ggml_vk_create_buffer_check(ctx, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); } } -static void ggml_vk_buffer_write_nc_async(vk_context * ctx, vk_buffer* dst, size_t offset, const ggml_tensor * tensor, bool sync_staging = false) { +static void ggml_vk_buffer_write_nc_async(ggml_backend_vk_context * ctx, vk_context * subctx, vk_buffer& dst, size_t offset, const ggml_tensor * tensor, bool sync_staging = false) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_write_nc_async(" << tensor << ")" << std::endl; #endif @@ -1423,9 +1580,9 @@ static void ggml_vk_buffer_write_nc_async(vk_context * ctx, vk_buffer* dst, size GGML_ASSERT(false); } // Check if src is pinned memory - vk_buffer * buf = nullptr; + vk_buffer buf; size_t buf_offset; - ggml_vk_host_get(tensor->data, buf, buf_offset); + ggml_vk_host_get(ctx, tensor->data, buf, buf_offset); const uint64_t ne0 = tensor->ne[0]; const uint64_t ne1 = tensor->ne[1]; @@ -1471,21 +1628,21 @@ static void ggml_vk_buffer_write_nc_async(vk_context * ctx, vk_buffer* dst, size } } - ggml_vk_sync_buffers(ctx); - ctx->s->buffer.copyBuffer(buf->buffer, dst->buffer, slices); + ggml_vk_sync_buffers(subctx); + subctx->s->buffer.copyBuffer(buf->buffer, dst->buffer, slices); return; } // Staging buffer required - vk_buffer * staging = &vk_staging; - size_t staging_offset = vk_staging_offset; + vk_buffer staging = ctx->staging; + size_t staging_offset = ctx->staging_offset; const size_t copy_size = ts*ne/bs; - if (vk_staging.size < vk_staging_offset + copy_size) { + if (ctx->staging->size < ctx->staging_offset + copy_size) { if (sync_staging) { // Create temporary larger buffer - ensure_sync_staging_buffer(copy_size); + ggml_vk_ensure_sync_staging_buffer(ctx, copy_size); - staging = &vk_sync_staging; + staging = ctx->sync_staging; staging_offset = 0; } else { GGML_ASSERT(false); @@ -1494,23 +1651,23 @@ static void ggml_vk_buffer_write_nc_async(vk_context * ctx, vk_buffer* dst, size VkBufferCopy buf_copy{ staging_offset, offset, copy_size }; - ggml_vk_sync_buffers(ctx); - vkCmdCopyBuffer(ctx->s->buffer, staging->buffer, dst->buffer, 1, &buf_copy); + ggml_vk_sync_buffers(subctx); + vkCmdCopyBuffer(subctx->s->buffer, staging->buffer, dst->buffer, 1, &buf_copy); for (uint64_t i3 = 0; i3 < ne3; i3++) { for (uint64_t i2 = 0; i2 < ne2; i2++) { // Find longest contiguous slice if (ne1*nb1 == dstnb2) { - deferred_memcpy((uint8_t *)staging->ptr + staging_offset + i3*dstnb3 + i2*dstnb2, (const uint8_t *) tensor->data + buf_offset + i3*nb3 + i2*nb2, dstnb2, &ctx->in_memcpys); + deferred_memcpy((uint8_t *)staging->ptr + staging_offset + i3*dstnb3 + i2*dstnb2, (const uint8_t *) tensor->data + buf_offset + i3*nb3 + i2*nb2, dstnb2, &subctx->in_memcpys); } else { for (uint64_t i1 = 0; i1 < ne1; i1++) { if (ne0*nb0/bs == dstnb1) { - deferred_memcpy((uint8_t *)staging->ptr + staging_offset + i3*dstnb3 + i2*dstnb2 + i1*dstnb1, (const uint8_t *) tensor->data + buf_offset + i3*nb3 + i2*nb2 + i1*nb1, dstnb1, &ctx->in_memcpys); + deferred_memcpy((uint8_t *)staging->ptr + staging_offset + i3*dstnb3 + i2*dstnb2 + i1*dstnb1, (const uint8_t *) tensor->data + buf_offset + i3*nb3 + i2*nb2 + i1*nb1, dstnb1, &subctx->in_memcpys); } else { const uint64_t s_off = buf_offset + i3*nb3 + i2*nb2 + i1*nb1; const uint64_t d_off = staging_offset + i3*dstnb3 + i2*dstnb2 + i1*dstnb1; for (uint64_t i0 = 0; i0 < ne0; i0++) { - deferred_memcpy((uint8_t *)staging->ptr + d_off + i0*dstnb0, (const uint8_t *) tensor->data + s_off + i0*nb0, dstnb0, &ctx->in_memcpys); + deferred_memcpy((uint8_t *)staging->ptr + d_off + i0*dstnb0, (const uint8_t *) tensor->data + s_off + i0*nb0, dstnb0, &subctx->in_memcpys); } } } @@ -1519,19 +1676,22 @@ static void ggml_vk_buffer_write_nc_async(vk_context * ctx, vk_buffer* dst, size } } -static void ggml_vk_buffer_write_2d_async(vk_context * ctx, vk_buffer* dst, size_t offset, const void * src, size_t spitch, size_t width, size_t height, bool sync_staging = false) { +static void ggml_vk_buffer_write_2d_async(ggml_backend_vk_context * ctx, vk_context * subctx, vk_buffer& dst, size_t offset, const void * src, size_t spitch, size_t width, size_t height, bool sync_staging = false) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_write_2d_async(" << width << ", " << height << ")" << std::endl; #endif + // Make sure ctx owns the buffer + GGML_ASSERT(dst->ctx == ctx); + // Buffer is already mapped if(dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) { std::cerr << "ggml_vulkan: buffer_write_async dst buffer is host_visible. Use synchronous write." << std::endl; GGML_ASSERT(false); } // Check if src is pinned memory - vk_buffer * buf = nullptr; + vk_buffer buf = nullptr; size_t buf_offset; - ggml_vk_host_get(src, buf, buf_offset); + ggml_vk_host_get(ctx, src, buf, buf_offset); if (buf != nullptr) { // Memory is pinned, use as staging buffer @@ -1550,8 +1710,8 @@ static void ggml_vk_buffer_write_2d_async(vk_context * ctx, vk_buffer* dst, size } } - ggml_vk_sync_buffers(ctx); - ctx->s->buffer.copyBuffer(buf->buffer, dst->buffer, slices); + ggml_vk_sync_buffers(subctx); + subctx->s->buffer.copyBuffer(buf->buffer, dst->buffer, slices); return; } #ifdef GGML_VULKAN_DEBUG @@ -1559,14 +1719,14 @@ static void ggml_vk_buffer_write_2d_async(vk_context * ctx, vk_buffer* dst, size #endif // Staging buffer required - vk_buffer * staging = &vk_staging; - size_t staging_offset = vk_staging_offset; + vk_buffer staging = ctx->staging; + size_t staging_offset = ctx->staging_offset; const size_t copy_size = width*height; - if (vk_staging.size < vk_staging_offset + copy_size) { + if (ctx->staging == nullptr || ctx->staging->size < ctx->staging_offset + copy_size) { if (sync_staging) { - ensure_sync_staging_buffer(copy_size); + ggml_vk_ensure_sync_staging_buffer(ctx, copy_size); - staging = &vk_sync_staging; + staging = ctx->sync_staging; staging_offset = 0; } else { GGML_ASSERT(false); @@ -1578,26 +1738,26 @@ static void ggml_vk_buffer_write_2d_async(vk_context * ctx, vk_buffer* dst, size offset, copy_size}; - ggml_vk_sync_buffers(ctx); - vkCmdCopyBuffer(ctx->s->buffer, staging->buffer, dst->buffer, 1, &buf_copy); + ggml_vk_sync_buffers(subctx); + vkCmdCopyBuffer(subctx->s->buffer, staging->buffer, dst->buffer, 1, &buf_copy); if (width == spitch) { - deferred_memcpy((uint8_t *)staging->ptr + staging_offset, src, width * height, &ctx->in_memcpys); + deferred_memcpy((uint8_t *)staging->ptr + staging_offset, src, width * height, &subctx->in_memcpys); } else { for (size_t i = 0; i < height; i++) { - deferred_memcpy((uint8_t *)staging->ptr + staging_offset + i * width, (const uint8_t *) src + i * spitch, width, &ctx->in_memcpys); + deferred_memcpy((uint8_t *)staging->ptr + staging_offset + i * width, (const uint8_t *) src + i * spitch, width, &subctx->in_memcpys); } } } -static void ggml_vk_buffer_write_async(vk_context * ctx, vk_buffer* dst, size_t offset, const void * src, size_t size, bool sync_staging = false) { +static void ggml_vk_buffer_write_async(ggml_backend_vk_context * ctx, vk_context * subctx, vk_buffer& dst, size_t offset, const void * src, size_t size, bool sync_staging = false) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_write_async(" << size << ")" << std::endl; #endif - return ggml_vk_buffer_write_2d_async(ctx, dst, offset, src, size, size, 1, sync_staging); + return ggml_vk_buffer_write_2d_async(ctx, subctx, dst, offset, src, size, size, 1, sync_staging); } -static void ggml_vk_buffer_write_2d(vk_buffer* dst, size_t offset, const void * src, size_t spitch, size_t width, size_t height) { +static void ggml_vk_buffer_write_2d(ggml_backend_vk_context * ctx, vk_buffer& dst, size_t offset, const void * src, size_t spitch, size_t width, size_t height) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_write_2d(" << width << ", " << height << ")" << std::endl; #endif @@ -1609,39 +1769,42 @@ static void ggml_vk_buffer_write_2d(vk_buffer* dst, size_t offset, const void * memcpy((uint8_t *)dst->ptr + offset + i * width, (const uint8_t *) src + i * spitch, width); } } else { - vk_context * ctx = ggml_vk_create_context(vk_device.transfer_queue); - ggml_vk_ctx_begin(ctx); - ggml_vk_buffer_write_2d_async(ctx, dst, offset, src, spitch, width, height, true); - ggml_vk_ctx_end(ctx); + vk_context * subctx = ggml_vk_create_context(ctx, ctx->device.lock()->transfer_queue); + ggml_vk_ctx_begin(ctx, subctx); + ggml_vk_buffer_write_2d_async(ctx, subctx, dst, offset, src, spitch, width, height, true); + ggml_vk_ctx_end(subctx); - for (auto& cpy : ctx->in_memcpys) { + for (auto& cpy : subctx->in_memcpys) { memcpy(cpy.dst, cpy.src, cpy.n); } - ggml_vk_submit(ctx, vk_fence); - VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "vk_buffer_write_2d waitForFences"); - vk_device.device.resetFences({ vk_fence }); + ggml_vk_submit(subctx, ctx->fence); + VK_CHECK(ctx->device.lock()->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "vk_buffer_write_2d waitForFences"); + ctx->device.lock()->device.resetFences({ ctx->fence }); } } -static void ggml_vk_buffer_write(vk_buffer* dst, size_t offset, const void * src, size_t size) { +static void ggml_vk_buffer_write(ggml_backend_vk_context * ctx, vk_buffer& dst, size_t offset, const void * src, size_t size) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_write(" << size << ")" << std::endl; #endif - ggml_vk_buffer_write_2d(dst, offset, src, 0, size, 1); + ggml_vk_buffer_write_2d(ctx, dst, offset, src, 0, size, 1); } -static void ggml_vk_buffer_read_2d_async(vk_context * ctx, vk_buffer* src, size_t offset, void * dst, size_t spitch, size_t dpitch, size_t width, size_t height, bool sync_staging = false) { +static void ggml_vk_buffer_read_2d_async(ggml_backend_vk_context * ctx, vk_context * subctx, vk_buffer& src, size_t offset, void * dst, size_t spitch, size_t dpitch, size_t width, size_t height, bool sync_staging = false) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_read_2d_async(offset=" << offset << ", width=" << width << ", height=" << height << ")" << std::endl; #endif GGML_ASSERT(width > 0); GGML_ASSERT(height > 0); - GGML_ASSERT(src->size > 0); + GGML_ASSERT(src != nullptr); + // Make sure ctx owns the buffer + GGML_ASSERT(src->ctx == ctx); + // Check if dst is pinned memory - vk_buffer * buf = nullptr; + vk_buffer buf = nullptr; size_t buf_offset; - ggml_vk_host_get(dst, buf, buf_offset); + ggml_vk_host_get(ctx, dst, buf, buf_offset); std::vector slices(1); if (width == spitch && width == dpitch) { @@ -1660,8 +1823,8 @@ static void ggml_vk_buffer_read_2d_async(vk_context * ctx, vk_buffer* src, size_ if (buf != nullptr) { // Memory is pinned, use as staging buffer - ggml_vk_sync_buffers(ctx); - ctx->s->buffer.copyBuffer(src->buffer, buf->buffer, slices); + ggml_vk_sync_buffers(subctx); + subctx->s->buffer.copyBuffer(src->buffer, buf->buffer, slices); return; } @@ -1670,30 +1833,30 @@ static void ggml_vk_buffer_read_2d_async(vk_context * ctx, vk_buffer* src, size_ #endif // Fall back to staging buffer - vk_buffer * staging = &vk_staging; + vk_buffer staging = ctx->staging; const size_t copy_size = dpitch * height; - if (vk_staging.size < vk_staging_offset + copy_size) { + if (ctx->staging == nullptr || ctx->staging->size < ctx->staging_offset + copy_size) { if (sync_staging) { // Create temporary larger buffer - ensure_sync_staging_buffer(copy_size); + ggml_vk_ensure_sync_staging_buffer(ctx, copy_size); - staging = &vk_sync_staging; + staging = ctx->sync_staging; } else { GGML_ASSERT(false); } } - ggml_vk_sync_buffers(ctx); - ctx->s->buffer.copyBuffer(src->buffer, staging->buffer, slices); + ggml_vk_sync_buffers(subctx); + subctx->s->buffer.copyBuffer(src->buffer, staging->buffer, slices); - deferred_memcpy(dst, staging->ptr, copy_size, &ctx->out_memcpys); + deferred_memcpy(dst, staging->ptr, copy_size, &subctx->out_memcpys); } -static void ggml_vk_buffer_read_async(vk_context * ctx, vk_buffer* src, size_t offset, void * dst, size_t size, bool sync_staging = false) { - return ggml_vk_buffer_read_2d_async(ctx, src, offset, dst, size, size, size, 1, sync_staging); +static void ggml_vk_buffer_read_async(ggml_backend_vk_context * ctx, vk_context * subctx, vk_buffer& src, size_t offset, void * dst, size_t size, bool sync_staging = false) { + return ggml_vk_buffer_read_2d_async(ctx, subctx, src, offset, dst, size, size, size, 1, sync_staging); } -static void ggml_vk_buffer_read(vk_buffer* src, size_t offset, void * dst, size_t size) { +static void ggml_vk_buffer_read(ggml_backend_vk_context * ctx, vk_buffer& src, size_t offset, void * dst, size_t size) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_read(" << offset << ", " << size << ")" << std::endl; #endif @@ -1702,61 +1865,88 @@ static void ggml_vk_buffer_read(vk_buffer* src, size_t offset, void * dst, size_ memcpy(dst, (uint8_t *) src->ptr + offset, size); } else { - vk_context * ctx = ggml_vk_create_context(vk_device.transfer_queue); - ggml_vk_ctx_begin(ctx); - ggml_vk_buffer_read_async(ctx, src, offset, dst, size, true); - ggml_vk_ctx_end(ctx); + vk_context * subctx = ggml_vk_create_context(ctx, ctx->device.lock()->transfer_queue); + ggml_vk_ctx_begin(ctx, subctx); + ggml_vk_buffer_read_async(ctx, subctx, src, offset, dst, size, true); + ggml_vk_ctx_end(subctx); - ggml_vk_submit(ctx, vk_fence); - VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "vk_buffer_read waitForFences"); - vk_device.device.resetFences({ vk_fence }); + ggml_vk_submit(subctx, ctx->fence); + VK_CHECK(ctx->device.lock()->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "vk_buffer_read waitForFences"); + ctx->device.lock()->device.resetFences({ ctx->fence }); - for (auto& cpy : ctx->out_memcpys) { + for (auto& cpy : subctx->out_memcpys) { memcpy(cpy.dst, cpy.src, cpy.n); } } } -static void ggml_vk_buffer_copy_async(vk_context * ctx, vk_buffer * dst, size_t dst_offset, vk_buffer * src, size_t src_offset, size_t size) { +static void ggml_vk_buffer_copy_async(vk_context * ctx, vk_buffer& dst, size_t dst_offset, vk_buffer& src, size_t src_offset, size_t size) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_copy_async(" << size << ")" << std::endl; #endif + // Make sure both buffers are on same ctx + GGML_ASSERT(src->ctx == dst->ctx); + VkBufferCopy bc{ src_offset, dst_offset, size }; vkCmdCopyBuffer(ctx->s->buffer, src->buffer, dst->buffer, 1, &bc); } -static void ggml_vk_buffer_copy(vk_buffer * dst, size_t dst_offset, vk_buffer * src, size_t src_offset, size_t size) { +static void ggml_vk_buffer_copy(vk_buffer& dst, size_t dst_offset, vk_buffer& src, size_t src_offset, size_t size) { + if (src->ctx == dst->ctx) { #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_buffer_copy(" << size << ")" << std::endl; + std::cerr << "ggml_vk_buffer_copy(SINGLE_DEVICE, " << size << ")" << std::endl; #endif - VkBufferCopy bc{ src_offset, dst_offset, size }; + // Copy within the device + ggml_backend_vk_context * ctx = src->ctx; - vk_context * ctx = ggml_vk_create_context(vk_device.transfer_queue); - ggml_vk_ctx_begin(ctx); - vkCmdCopyBuffer(ctx->s->buffer, src->buffer, dst->buffer, 1, &bc); - ggml_vk_buffer_copy_async(ctx, dst, dst_offset, src, src_offset, size); - ggml_vk_ctx_end(ctx); - ggml_vk_submit(ctx, vk_fence); - VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "vk_buffer_copy waitForFences"); - vk_device.device.resetFences({ vk_fence }); + VkBufferCopy bc{ src_offset, dst_offset, size }; + + vk_context * subctx = ggml_vk_create_context(ctx, ctx->device.lock()->transfer_queue); + ggml_vk_ctx_begin(ctx, subctx); + ggml_vk_buffer_copy_async(subctx, dst, dst_offset, src, src_offset, size); + ggml_vk_ctx_end(subctx); + ggml_vk_submit(subctx, ctx->fence); + VK_CHECK(ctx->device.lock()->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "vk_buffer_copy waitForFences"); + ctx->device.lock()->device.resetFences({ ctx->fence }); + } else { +#ifdef GGML_VULKAN_DEBUG + std::cerr << "ggml_vk_buffer_copy(MULTI_DEVICE, " << size << ")" << std::endl; +#endif + // Copy device to device + ggml_backend_vk_context * src_ctx = src->ctx; + ggml_backend_vk_context * dst_ctx = dst->ctx; + + ggml_vk_ensure_sync_staging_buffer(src_ctx, size); + ggml_vk_ensure_sync_staging_buffer(dst_ctx, size); + + // Copy to src staging buffer + ggml_vk_buffer_copy(src_ctx->sync_staging, 0, src, src_offset, size); + // memcpy to dst staging buffer + memcpy(dst_ctx->sync_staging->ptr, src_ctx->sync_staging->ptr, size); + // Copy to dst buffer + ggml_vk_buffer_copy(dst, dst_offset, dst_ctx->sync_staging, 0, size); + } } -static void ggml_vk_buffer_memset(vk_buffer* dst, size_t offset, uint32_t c, size_t size) { +static void ggml_vk_buffer_memset(ggml_backend_vk_context * ctx, vk_buffer& dst, size_t offset, uint32_t c, size_t size) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_buffer_memset(" << offset << ", " << c << ", " << size << ")" << std::endl; #endif - vk_context * ctx = ggml_vk_create_context(vk_device.transfer_queue); - ggml_vk_ctx_begin(ctx); - ctx->s->buffer.fillBuffer(dst->buffer, offset, size, c); - ggml_vk_ctx_end(ctx); + // Make sure ctx owns the buffer + GGML_ASSERT(dst->ctx == ctx); - ggml_vk_submit(ctx, vk_fence); - VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "vk_memset waitForFences"); - vk_device.device.resetFences({ vk_fence }); + vk_context * subctx = ggml_vk_create_context(ctx, ctx->device.lock()->transfer_queue); + ggml_vk_ctx_begin(ctx, subctx); + subctx->s->buffer.fillBuffer(dst->buffer, offset, size, c); + ggml_vk_ctx_end(subctx); + + ggml_vk_submit(subctx, ctx->fence); + VK_CHECK(ctx->device.lock()->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "vk_memset waitForFences"); + ctx->device.lock()->device.resetFences({ ctx->fence }); } -static void ggml_vk_h2d_tensor_2d(vk_context * ctx, vk_buffer * dst, size_t offset, const ggml_tensor * src, uint64_t i3, uint64_t i2, uint64_t i1) { +static void ggml_vk_h2d_tensor_2d(ggml_backend_vk_context * ctx, vk_context * subctx, vk_buffer& dst, size_t offset, const ggml_tensor * src, uint64_t i3, uint64_t i2, uint64_t i1) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_h2d_tensor_2d(dst=" << dst << ", offset=" << offset << ", src=" << src << ", i3=" << i3 << ", i2=" << i2 << ", i1=" << i1 << ")" << std::endl; #endif @@ -1773,20 +1963,20 @@ static void ggml_vk_h2d_tensor_2d(vk_context * ctx, vk_buffer * dst, size_t offs const void * x = (const void *) ((const char *) src->data + i2*nb2 + i3*nb3); if (nb0 == ts && nb1 == row_length) { - return ggml_vk_buffer_write_async(ctx, dst, offset, x, i1*nb1); + return ggml_vk_buffer_write_async(ctx, subctx, dst, offset, x, i1*nb1); } if (nb0 == ts && (i1 == ne1 || !ggml_is_permuted(src))) { - return ggml_vk_buffer_write_2d_async(ctx, dst, offset, x, nb1, row_length, i1); + return ggml_vk_buffer_write_2d_async(ctx, subctx, dst, offset, x, nb1, row_length, i1); } GGML_ASSERT(i3 == 0); GGML_ASSERT(i2 == 0); GGML_ASSERT(i1 == (uint64_t) ggml_nrows(src)); - return ggml_vk_buffer_write_nc_async(ctx, dst, offset, src); + return ggml_vk_buffer_write_nc_async(ctx, subctx, dst, offset, src); } -static void ggml_vk_d2h_tensor_2d(vk_context * ctx, vk_buffer * src, size_t offset, const ggml_tensor * dst) { +static void ggml_vk_d2h_tensor_2d(ggml_backend_vk_context * ctx, vk_context * subctx, vk_buffer& src, size_t offset, const ggml_tensor * dst) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_d2h_tensor_2d()" << std::endl; #endif @@ -1804,10 +1994,10 @@ static void ggml_vk_d2h_tensor_2d(vk_context * ctx, vk_buffer * src, size_t offs const size_t row_length = ts*ne0/bs; if (ggml_is_contiguous(dst)) { - return ggml_vk_buffer_read_async(ctx, src, offset, dst->data, ne1*nb1*ne2*ne3); + return ggml_vk_buffer_read_async(ctx, subctx, src, offset, dst->data, ne1*nb1*ne2*ne3); } if (nb0 == ts) { - return ggml_vk_buffer_read_2d_async(ctx, src, offset, dst->data, nb1, nb1, row_length, ne1*ne2*ne3); + return ggml_vk_buffer_read_2d_async(ctx, subctx, src, offset, dst->data, nb1, nb1, row_length, ne1*ne2*ne3); } GGML_ASSERT(false); } @@ -1829,89 +2019,89 @@ static uint32_t ggml_vk_guess_split_k(int m, int n, int k) { return 1; } -static uint32_t ggml_vk_guess_matmul_pipeline_align(int m, int n) { +static uint32_t ggml_vk_guess_matmul_pipeline_align(ggml_backend_vk_context * ctx, int m, int n) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_guess_matmul_pipeline_align(" << m << ", " << n << ")" << std::endl; #endif if (m <= 32 || n <= 32) { - return vk_pipeline_matmul_f32_aligned_s.align; + return ctx->pipeline_matmul_f32_aligned_s.align; } - if (vk_device.subgroup_size == 64 || m <= 64 || n <= 64) { - return vk_pipeline_matmul_f32_aligned_m.align; + if (ctx->device.lock()->subgroup_size == 64 || m <= 64 || n <= 64) { + return ctx->pipeline_matmul_f32_aligned_m.align; } - return vk_pipeline_matmul_f32_aligned_l.align; + return ctx->pipeline_matmul_f32_aligned_l.align; } -static vk_pipeline* ggml_vk_guess_matmul_pipeline(bool bit16_x, bool bit16_y, int m, int n, bool aligned) { +static vk_pipeline* ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, int m, int n, bool aligned) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_guess_matmul_pipeline(" << bit16_x << ", " << bit16_y << ", " << m << ", " << n << ", " << aligned << ")"; #endif if (bit16_x && bit16_y) { - if (vk_device.vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) { + if (ctx->device.lock()->vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) { #ifdef GGML_VULKAN_DEBUG std::cerr << " S" << std::endl; #endif - return aligned ? &vk_pipeline_matmul_f16_aligned_s : &vk_pipeline_matmul_f16_s; + return aligned ? &ctx->pipeline_matmul_f16_aligned_s : &ctx->pipeline_matmul_f16_s; } - if (vk_device.subgroup_size == 64 || m <= 64 || n <= 64) { + if (ctx->device.lock()->subgroup_size == 64 || m <= 64 || n <= 64) { #ifdef GGML_VULKAN_DEBUG std::cerr << " M" << std::endl; #endif - return aligned ? &vk_pipeline_matmul_f16_aligned_m : &vk_pipeline_matmul_f16_m; + return aligned ? &ctx->pipeline_matmul_f16_aligned_m : &ctx->pipeline_matmul_f16_m; } #ifdef GGML_VULKAN_DEBUG std::cerr << " L" << std::endl; #endif - return aligned ? &vk_pipeline_matmul_f16_aligned_l : &vk_pipeline_matmul_f16_l; + return aligned ? &ctx->pipeline_matmul_f16_aligned_l : &ctx->pipeline_matmul_f16_l; } if (bit16_x && !bit16_y) { - if (vk_device.vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) { + if (ctx->device.lock()->vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) { #ifdef GGML_VULKAN_DEBUG std::cerr << " S" << std::endl; #endif - return aligned ? &vk_pipeline_matmul_f16_f32_aligned_s : &vk_pipeline_matmul_f16_f32_s; + return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_s : &ctx->pipeline_matmul_f16_f32_s; } - if (vk_device.subgroup_size == 64 || m <= 64 || n <= 64) { + if (ctx->device.lock()->subgroup_size == 64 || m <= 64 || n <= 64) { #ifdef GGML_VULKAN_DEBUG std::cerr << " M" << std::endl; #endif - return aligned ? &vk_pipeline_matmul_f16_f32_aligned_m : &vk_pipeline_matmul_f16_f32_m; + return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_m : &ctx->pipeline_matmul_f16_f32_m; } #ifdef GGML_VULKAN_DEBUG std::cerr << " L" << std::endl; #endif - return aligned ? &vk_pipeline_matmul_f16_f32_aligned_l : &vk_pipeline_matmul_f16_f32_l; + return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_l : &ctx->pipeline_matmul_f16_f32_l; } if (!bit16_x && bit16_y) { GGML_ASSERT(false); } - if (vk_device.vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) { + if (ctx->device.lock()->vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) { #ifdef GGML_VULKAN_DEBUG std::cerr << " S" << std::endl; #endif - return aligned ? &vk_pipeline_matmul_f32_aligned_s : &vk_pipeline_matmul_f32_s; + return aligned ? &ctx->pipeline_matmul_f32_aligned_s : &ctx->pipeline_matmul_f32_s; } - if (vk_device.subgroup_size == 64 || m <= 64 || n <= 64) { + if (ctx->device.lock()->subgroup_size == 64 || m <= 64 || n <= 64) { #ifdef GGML_VULKAN_DEBUG std::cerr << " M" << std::endl; #endif - return aligned ? &vk_pipeline_matmul_f32_aligned_m : &vk_pipeline_matmul_f32_m; + return aligned ? &ctx->pipeline_matmul_f32_aligned_m : &ctx->pipeline_matmul_f32_m; } #ifdef GGML_VULKAN_DEBUG std::cerr << " L" << std::endl; #endif - return aligned ? &vk_pipeline_matmul_f32_aligned_l : &vk_pipeline_matmul_f32_l; + return aligned ? &ctx->pipeline_matmul_f32_aligned_l : &ctx->pipeline_matmul_f32_l; } -static void ggml_vk_matmul(vk_context * ctx, vk_pipeline& pipeline, vk_subbuffer&& a, vk_subbuffer&& b, vk_subbuffer&& d, vk_subbuffer&& split_k_buffer, uint32_t m, uint32_t n, uint32_t k, uint32_t stride_a, uint32_t stride_b, uint32_t stride_d, uint32_t split_k, uint32_t batch, uint32_t ne02, uint32_t ne12, uint32_t broadcast2, uint32_t broadcast3, uint32_t batch_stride_a, uint32_t batch_stride_b, uint32_t batch_stride_d) { +static void ggml_vk_matmul(ggml_backend_vk_context * ctx, vk_context * subctx, vk_pipeline& pipeline, vk_subbuffer&& a, vk_subbuffer&& b, vk_subbuffer&& d, vk_subbuffer&& split_k_buffer, uint32_t m, uint32_t n, uint32_t k, uint32_t stride_a, uint32_t stride_b, uint32_t stride_d, uint32_t split_k, uint32_t batch, uint32_t ne02, uint32_t ne12, uint32_t broadcast2, uint32_t broadcast3, uint32_t batch_stride_a, uint32_t batch_stride_b, uint32_t batch_stride_d) { #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_matmul(a: (" << a.buffer.buffer << ", " << a.offset << ", " << a.size << "), b: (" << b.buffer.buffer << ", " << b.offset << ", " << b.size << "), c: (" << d.buffer.buffer << ", " << d.offset << ", " << d.size << "), split_k: (" << split_k_buffer.buffer.buffer << ", " << split_k_buffer.offset << ", " << split_k_buffer.size << "), m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", split_k: " << split_k << ", batch: " << batch << ", ne02: " << ne02 << ", ne12: " << ne12 << ", broadcast2: " << broadcast2 << ", broadcast3: " << broadcast3 << ", batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ")" << std::endl; + std::cerr << "ggml_vk_matmul(a: (" << a.buffer->buffer << ", " << a.offset << ", " << a.size << "), b: (" << b.buffer->buffer << ", " << b.offset << ", " << b.size << "), c: (" << d.buffer->buffer << ", " << d.offset << ", " << d.size << "), split_k: (" << split_k_buffer.buffer->buffer << ", " << split_k_buffer.offset << ", " << split_k_buffer.size << "), m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", split_k: " << split_k << ", batch: " << batch << ", ne02: " << ne02 << ", ne12: " << ne12 << ", broadcast2: " << broadcast2 << ", broadcast3: " << broadcast3 << ", batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ")" << std::endl; #endif - ggml_vk_sync_buffers(ctx); + ggml_vk_sync_buffers(subctx); if (split_k == 1) { const std::array pc = { m, n, k, stride_a, stride_b, stride_d, k, ne02, ne12, broadcast2, broadcast3, batch_stride_a, batch_stride_b, batch_stride_d }; - ggml_vk_dispatch_pipeline(ctx, pipeline, { a, b, d }, pc.size() * sizeof(uint32_t), pc.data(), { m, n, batch }); + ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, d }, pc.size() * sizeof(uint32_t), pc.data(), { m, n, batch }); return; } @@ -1919,10 +2109,10 @@ static void ggml_vk_matmul(vk_context * ctx, vk_pipeline& pipeline, vk_subbuffer const std::array pc1 = { m, n, k, stride_a, stride_b, stride_d, CEIL_DIV(k, split_k), ne02, ne12, broadcast2, broadcast3, batch_stride_a, batch_stride_b, batch_stride_d }; // Make sure enough workgroups get assigned for split k to work - ggml_vk_dispatch_pipeline(ctx, pipeline, { a, b, split_k_buffer }, pc1.size() * sizeof(uint32_t), pc1.data(), { (CEIL_DIV(m, pipeline.wg_denoms[0]) * pipeline.wg_denoms[0]) * split_k, n, batch }); - ggml_vk_sync_buffers(ctx); + ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, split_k_buffer }, pc1.size() * sizeof(uint32_t), pc1.data(), { (CEIL_DIV(m, pipeline.wg_denoms[0]) * pipeline.wg_denoms[0]) * split_k, n, batch }); + ggml_vk_sync_buffers(subctx); const std::array pc2 = { (uint32_t)(m * n * batch), split_k }; - ggml_vk_dispatch_pipeline(ctx, vk_pipeline_matmul_split_k_reduce, { split_k_buffer, d }, pc2.size() * sizeof(uint32_t), pc2.data(), { m * n * batch, 1, 1 }); + ggml_vk_dispatch_pipeline(ctx, subctx, ctx->pipeline_matmul_split_k_reduce, { split_k_buffer, d }, pc2.size() * sizeof(uint32_t), pc2.data(), { m * n * batch, 1, 1 }); } static bool ggml_vk_dim01_contiguous(const ggml_tensor * tensor) { @@ -1932,32 +2122,32 @@ static bool ggml_vk_dim01_contiguous(const ggml_tensor * tensor) { tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; } -static vk_pipeline * ggml_vk_get_cpy_pipeline(ggml_type from, ggml_type to) { +static vk_pipeline * ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, ggml_type from, ggml_type to) { if (from == GGML_TYPE_F32 && to == GGML_TYPE_F32) { - return &vk_pipeline_cpy_f32_f32; + return &ctx->pipeline_cpy_f32_f32; } if (from == GGML_TYPE_F32 && to == GGML_TYPE_F16) { - return &vk_pipeline_cpy_f32_f16; + return &ctx->pipeline_cpy_f32_f16; } if (from == GGML_TYPE_F16 && to == GGML_TYPE_F16) { - return &vk_pipeline_cpy_f16_f16; + return &ctx->pipeline_cpy_f16_f16; } std::cerr << "Missing CPY op for types: " << ggml_type_name(from) << " " << ggml_type_name(to) << std::endl; GGML_ASSERT(false); } -static void ggml_vk_cpy_to_contiguous(vk_context * ctx, vk_pipeline * pipeline, const ggml_tensor * tensor, vk_subbuffer&& in, vk_subbuffer&& out, ggml_type buffer_type, bool aligned=true) { +static void ggml_vk_cpy_to_contiguous(ggml_backend_vk_context * ctx, vk_context * subctx, vk_pipeline * pipeline, const ggml_tensor * tensor, vk_subbuffer&& in, vk_subbuffer&& out, ggml_type buffer_type, bool aligned=true) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_cpy_to_contiguous((" << tensor << ", type=" << tensor->type << ", backend=" << tensor->backend << ", ne0=" << tensor->ne[0] << ", ne1=" << tensor->ne[1] << ", ne2=" << tensor->ne[2] << ", ne3=" << tensor->ne[3] << ", nb0=" << tensor->nb[0] << ", nb1=" << tensor->nb[1] << ", nb2=" << tensor->nb[2] << ", nb3=" << tensor->nb[3] << "), "; - std::cerr << "buffer in size=" << in.buffer.size << ", buffer out size=" << out.buffer.size << ")" << std::endl; + std::cerr << "buffer in size=" << in.buffer->size << ", buffer out size=" << out.buffer->size << ")" << std::endl; #endif const int tensor_type_size = ggml_type_size(tensor->type); const int dst_type_size = ggml_type_size(buffer_type); const uint32_t ne = tensor->ne[0] * tensor->ne[1] * tensor->ne[2]; - const uint32_t nb2 = aligned ? ggml_vk_align_size(dst_type_size * tensor->ne[0] * tensor->ne[1], vk_device.properties.limits.minStorageBufferOffsetAlignment) / dst_type_size : tensor->ne[0] * tensor->ne[1]; + const uint32_t nb2 = aligned ? ggml_vk_align_size(dst_type_size * tensor->ne[0] * tensor->ne[1], ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) / dst_type_size : tensor->ne[0] * tensor->ne[1]; const vk_op_cpy_push_constants pc = { (uint32_t)ne, @@ -1965,11 +2155,11 @@ static void ggml_vk_cpy_to_contiguous(vk_context * ctx, vk_pipeline * pipeline, (uint32_t)tensor->ne[0], (uint32_t)tensor->ne[1], 1 , (uint32_t)tensor->ne[0] , nb2, 0, }; - ggml_vk_sync_buffers(ctx); - ggml_vk_dispatch_pipeline(ctx, *pipeline, { in, out }, sizeof(vk_op_cpy_push_constants), &pc, { ne, 1, 1 }); + ggml_vk_sync_buffers(subctx); + ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { in, out }, sizeof(vk_op_cpy_push_constants), &pc, { ne, 1, 1 }); } -static void ggml_vk_mul_mat_q_f16(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_mul_mat_q_f16((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3]; std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3]; @@ -1998,17 +2188,17 @@ static void ggml_vk_mul_mat_q_f16(vk_context * ctx, const ggml_tensor * src0, co ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra; ggml_tensor_extra_gpu * extra_src1 = (ggml_tensor_extra_gpu *) src1->extra; - vk_buffer * d_Qx = nullptr; + vk_buffer d_Qx; size_t qx_buf_offset = 0; - vk_buffer * d_Qy = nullptr; + vk_buffer d_Qy; size_t qy_buf_offset = 0; bool src0_uma = false; bool src1_uma = false; - if (vk_device.uma) { - ggml_vk_host_get(src0->data, d_Qx, qx_buf_offset); - ggml_vk_host_get(src1->data, d_Qy, qy_buf_offset); + if (ctx->device.lock()->uma) { + ggml_vk_host_get(ctx, src0->data, d_Qx, qx_buf_offset); + ggml_vk_host_get(ctx, src1->data, d_Qy, qy_buf_offset); src0_uma = d_Qx != nullptr; src1_uma = d_Qy != nullptr; } @@ -2031,12 +2221,12 @@ static void ggml_vk_mul_mat_q_f16(vk_context * ctx, const ggml_tensor * src0, co const int y_ne = ne11 * ne10; const int d_ne = ne11 * ne01; - const uint32_t kpad = ggml_vk_align_size(ne10, ggml_vk_guess_matmul_pipeline_align(ne01, ne11)); + const uint32_t kpad = ggml_vk_align_size(ne10, ggml_vk_guess_matmul_pipeline_align(ctx, ne01, ne11)); const bool aligned = ne10 == kpad; const uint32_t split_k = ggml_vk_guess_split_k(ne01, ne11, ne10); - vk_pipeline * pipeline = ggml_vk_guess_matmul_pipeline(true, !f16_f32_kernel, ne01, ne11, aligned); + vk_pipeline * pipeline = ggml_vk_guess_matmul_pipeline(ctx, true, !f16_f32_kernel, ne01, ne11, aligned); const uint64_t qx_sz = ggml_type_size(src0->type) * x_ne / ggml_blck_size(src0->type); const uint64_t qy_sz = ggml_type_size(src1->type) * y_ne / ggml_blck_size(src1->type); @@ -2044,30 +2234,30 @@ static void ggml_vk_mul_mat_q_f16(vk_context * ctx, const ggml_tensor * src0, co const uint64_t y_sz = f16_f32_kernel ? sizeof(float) * y_ne : sizeof(ggml_fp16_t) * y_ne; const uint64_t d_sz = sizeof(float) * d_ne; - vk_buffer* d_D = &extra->buffer_gpu; + vk_buffer d_D = extra->buffer_gpu.lock(); const uint64_t d_buf_offset = extra->offset; GGML_ASSERT(d_D != nullptr); GGML_ASSERT(d_D->size >= d_buf_offset + d_sz * ne02 * ne03); - vk_buffer* d_X; + vk_buffer d_X; uint64_t x_buf_offset = 0; - vk_buffer* d_Y; + vk_buffer d_Y; uint64_t y_buf_offset = 0; if (load_x) { - d_Qx = &vk_prealloc_qx; + d_Qx = ctx->prealloc_qx; } else if (!src0_uma) { - d_Qx = &extra_src0->buffer_gpu; + d_Qx = extra_src0->buffer_gpu.lock(); qx_buf_offset = extra_src0->offset; GGML_ASSERT(d_Qx != nullptr); } if (load_y) { - d_Qy = &vk_prealloc_qy; + d_Qy = ctx->prealloc_qy; } else if (!src1_uma) { - d_Qy = &extra_src1->buffer_gpu; + d_Qy = extra_src1->buffer_gpu.lock(); qy_buf_offset = extra_src1->offset; GGML_ASSERT(d_Qy != nullptr); } if (qx_needs_dequant) { - d_X = &vk_prealloc_x; + d_X = ctx->prealloc_x; GGML_ASSERT(d_X->size >= x_sz * ne02 * ne03); } else { d_X = d_Qx; @@ -2075,7 +2265,7 @@ static void ggml_vk_mul_mat_q_f16(vk_context * ctx, const ggml_tensor * src0, co GGML_ASSERT(qx_sz == x_sz); // NOLINT } if (qy_needs_dequant) { - d_Y = &vk_prealloc_y; + d_Y = ctx->prealloc_y; GGML_ASSERT(d_Y->size >= y_sz * ne02 * ne03); } else { d_Y = d_Qy; @@ -2087,49 +2277,49 @@ static void ggml_vk_mul_mat_q_f16(vk_context * ctx, const ggml_tensor * src0, co vk_pipeline * to_fp16_vk_1 = nullptr; if (x_non_contig) { - to_fp16_vk_0 = ggml_vk_get_cpy_pipeline(src0->type, GGML_TYPE_F16); + to_fp16_vk_0 = ggml_vk_get_cpy_pipeline(ctx, src0->type, GGML_TYPE_F16); } else { - to_fp16_vk_0 = ggml_vk_get_to_fp16(src0->type); + to_fp16_vk_0 = ggml_vk_get_to_fp16(ctx, src0->type); } if (y_non_contig) { - to_fp16_vk_1 = ggml_vk_get_cpy_pipeline(src1->type, GGML_TYPE_F16); + to_fp16_vk_1 = ggml_vk_get_cpy_pipeline(ctx, src1->type, GGML_TYPE_F16); } else { - to_fp16_vk_1 = ggml_vk_get_to_fp16(src1->type); + to_fp16_vk_1 = ggml_vk_get_to_fp16(ctx, src1->type); } GGML_ASSERT(!qx_needs_dequant || to_fp16_vk_0 != nullptr); // NOLINT GGML_ASSERT(!qy_needs_dequant || to_fp16_vk_1 != nullptr); // NOLINT // Allocate descriptor sets - ggml_vk_pipeline_allocate_descriptor_sets(*pipeline, ne12 * ne13); + ggml_pipeline_allocate_descriptor_sets(ctx, *pipeline, ne12 * ne13); if (qx_needs_dequant) { - ggml_vk_pipeline_allocate_descriptor_sets(*to_fp16_vk_0, x_non_contig ? 1 : ne12 * ne13); + ggml_pipeline_allocate_descriptor_sets(ctx, *to_fp16_vk_0, x_non_contig ? 1 : ne12 * ne13); } if (qy_needs_dequant) { - ggml_vk_pipeline_allocate_descriptor_sets(*to_fp16_vk_1, y_non_contig ? 1 : ne12 * ne13); + ggml_pipeline_allocate_descriptor_sets(ctx, *to_fp16_vk_1, y_non_contig ? 1 : ne12 * ne13); } if (split_k > 1) { - ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline_matmul_split_k_reduce, ne12 * ne13); + ggml_pipeline_allocate_descriptor_sets(ctx, ctx->pipeline_matmul_split_k_reduce, ne12 * ne13); } if (x_non_contig) { - ggml_vk_cpy_to_contiguous(ctx, to_fp16_vk_0, src0, { *d_Qx, qx_buf_offset, VK_WHOLE_SIZE }, { *d_X, 0, VK_WHOLE_SIZE }, dst->type, false); + ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_0, src0, { d_Qx, qx_buf_offset, VK_WHOLE_SIZE }, { d_X, 0, VK_WHOLE_SIZE }, dst->type, false); } else if (load_x || qx_needs_dequant) { if (load_x) { // copy data to device - ggml_vk_h2d_tensor_2d(ctx, d_Qx, 0, src0, 0, 0, ggml_nrows(src0)); - vk_staging_offset = qx_sz * ne02 * ne03; + ggml_vk_h2d_tensor_2d(ctx, subctx, d_Qx, 0, src0, 0, 0, ggml_nrows(src0)); + ctx->staging_offset = qx_sz * ne02 * ne03; } if (qx_needs_dequant) { const std::vector pc = { (int)ne01, (int)ne10, (int)ne10, (int)ne10 }; - ggml_vk_sync_buffers(ctx); - ggml_vk_dispatch_pipeline(ctx, *to_fp16_vk_0, { { *d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, { *d_X, 0, x_sz * ne02 * ne03 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)(x_ne * ne02 * ne03), 1, 1}); + ggml_vk_sync_buffers(subctx); + ggml_vk_dispatch_pipeline(ctx, subctx, *to_fp16_vk_0, { { d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, { d_X, 0, x_sz * ne02 * ne03 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)(x_ne * ne02 * ne03), 1, 1}); } } if (y_non_contig) { - ggml_vk_cpy_to_contiguous(ctx, to_fp16_vk_1, src1, { *d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { *d_Y, 0, VK_WHOLE_SIZE }, dst->type); + ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE }, dst->type); } else if (load_y) { - ggml_vk_h2d_tensor_2d(ctx, d_Qy, 0, src1, 0, 0, ggml_nrows(src1)); + ggml_vk_h2d_tensor_2d(ctx, subctx, d_Qy, 0, src1, 0, 0, ggml_nrows(src1)); } uint32_t stride_batch_x = ne00*ne01; @@ -2144,16 +2334,16 @@ static void ggml_vk_mul_mat_q_f16(vk_context * ctx, const ggml_tensor * src0, co } // compute - ggml_vk_matmul(ctx, *pipeline, { *d_X, x_buf_offset, x_sz * ne02 * ne03 }, { *d_Y, y_buf_offset, y_sz * ne12 * ne13 }, { *d_D, d_buf_offset, d_sz * ne12 * ne13 }, { vk_prealloc_split_k, 0, d_sz * ne12 * ne13 * split_k }, ne01, ne11, ne10, ne10, ne10, ne01, split_k, ne12*ne13, ne02, ne12, r2, r3, stride_batch_x, stride_batch_y, ne20*ne21); // NOLINT + ggml_vk_matmul(ctx, subctx, *pipeline, { d_X, x_buf_offset, x_sz * ne02 * ne03 }, { d_Y, y_buf_offset, y_sz * ne12 * ne13 }, { d_D, d_buf_offset, d_sz * ne12 * ne13 }, { ctx->prealloc_split_k, 0, d_sz * ne12 * ne13 * split_k }, ne01, ne11, ne10, ne10, ne10, ne01, split_k, ne12*ne13, ne02, ne12, r2, r3, stride_batch_x, stride_batch_y, ne20*ne21); // NOLINT if (dst->backend == GGML_BACKEND_CPU) { // copy dst to host float * d = (float *) ((char *) dst->data); - ggml_vk_buffer_read_async(ctx, d_D, 0, d, sizeof(float) * d_ne * ne12 * ne13); + ggml_vk_buffer_read_async(ctx, subctx, d_D, 0, d, sizeof(float) * d_ne * ne12 * ne13); } } -static void ggml_vk_mul_mat_vec_q_f16(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_mul_mat_vec_q_f16((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3]; std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3]; @@ -2184,17 +2374,17 @@ static void ggml_vk_mul_mat_vec_q_f16(vk_context * ctx, const ggml_tensor * src0 ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra; ggml_tensor_extra_gpu * extra_src1 = (ggml_tensor_extra_gpu *) src1->extra; - vk_buffer * d_Qx = nullptr; + vk_buffer d_Qx; size_t qx_buf_offset = 0; - vk_buffer * d_Qy = nullptr; + vk_buffer d_Qy; size_t qy_buf_offset = 0; bool src0_uma = false; bool src1_uma = false; - if (vk_device.uma) { - ggml_vk_host_get(src0->data, d_Qx, qx_buf_offset); - ggml_vk_host_get(src1->data, d_Qy, qy_buf_offset); + if (ctx->device.lock()->uma) { + ggml_vk_host_get(ctx, src0->data, d_Qx, qx_buf_offset); + ggml_vk_host_get(ctx, src1->data, d_Qy, qy_buf_offset); src0_uma = d_Qx != nullptr; src1_uma = d_Qy != nullptr; } @@ -2214,42 +2404,42 @@ static void ggml_vk_mul_mat_vec_q_f16(vk_context * ctx, const ggml_tensor * src0 const uint64_t y_ne = ne11 * ne10; const uint64_t d_ne = ne11 * ne01; - const uint64_t qx_sz = ggml_vk_align_size(ggml_type_size(src0->type) * x_ne / ggml_blck_size(src0->type), vk_device.properties.limits.minStorageBufferOffsetAlignment); + const uint64_t qx_sz = ggml_vk_align_size(ggml_type_size(src0->type) * x_ne / ggml_blck_size(src0->type), ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment); const uint64_t qy_sz = ggml_type_size(src1->type) * y_ne / ggml_blck_size(src1->type); - const uint64_t x_sz = x_non_contig ? ggml_vk_align_size(ggml_type_size(src0->type) * x_ne, vk_device.properties.limits.minStorageBufferOffsetAlignment) : qx_sz; + const uint64_t x_sz = x_non_contig ? ggml_vk_align_size(ggml_type_size(src0->type) * x_ne, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) : qx_sz; const uint64_t y_sz = f16_f32_kernel ? sizeof(float) * y_ne : sizeof(ggml_fp16_t) * y_ne; const uint64_t d_sz = sizeof(float) * d_ne; - vk_buffer* d_D = &extra->buffer_gpu; + vk_buffer d_D = extra->buffer_gpu.lock(); const uint64_t d_buf_offset = extra->offset; GGML_ASSERT(d_D != nullptr); - vk_buffer* d_X; + vk_buffer d_X; uint64_t x_buf_offset = 0; - vk_buffer* d_Y; + vk_buffer d_Y; uint64_t y_buf_offset = 0; if (load_x) { - d_Qx = &vk_prealloc_qx; + d_Qx = ctx->prealloc_qx; } else if(!src1_uma) { - d_Qx = &extra_src0->buffer_gpu; + d_Qx = extra_src0->buffer_gpu.lock(); qx_buf_offset = extra_src0->offset; GGML_ASSERT(d_Qx != nullptr); } if (load_y) { - d_Qy = &vk_prealloc_qy; + d_Qy = ctx->prealloc_qy; } else if(!src1_uma) { - d_Qy = &extra_src1->buffer_gpu; + d_Qy = extra_src1->buffer_gpu.lock(); qy_buf_offset = extra_src1->offset; GGML_ASSERT(d_Qy != nullptr); } if (qx_needs_dequant) { - d_X = &vk_prealloc_x; + d_X = ctx->prealloc_x; } else { d_X = d_Qx; x_buf_offset = qx_buf_offset; GGML_ASSERT(qx_sz == x_sz); } if (qy_needs_dequant) { - d_Y = &vk_prealloc_y; + d_Y = ctx->prealloc_y; } else { d_Y = d_Qy; y_buf_offset = qy_buf_offset; @@ -2259,39 +2449,39 @@ static void ggml_vk_mul_mat_vec_q_f16(vk_context * ctx, const ggml_tensor * src0 vk_pipeline * to_fp16_vk_0 = nullptr; vk_pipeline* to_fp16_vk_1 = nullptr; if (x_non_contig) { - to_fp16_vk_0 = ggml_vk_get_cpy_pipeline(src0->type, src0->type); + to_fp16_vk_0 = ggml_vk_get_cpy_pipeline(ctx, src0->type, src0->type); } if (y_non_contig) { - to_fp16_vk_1 = ggml_vk_get_cpy_pipeline(src1->type, src1->type); + to_fp16_vk_1 = ggml_vk_get_cpy_pipeline(ctx, src1->type, src1->type); } else { - to_fp16_vk_1 = ggml_vk_get_to_fp16(src1->type); + to_fp16_vk_1 = ggml_vk_get_to_fp16(ctx, src1->type); } - vk_pipeline* dmmv = ggml_vk_get_dequantize_mul_mat_vec(src0->type); + vk_pipeline* dmmv = ggml_vk_get_dequantize_mul_mat_vec(ctx, src0->type); GGML_ASSERT(!qx_needs_dequant || to_fp16_vk_0 != nullptr); // NOLINT GGML_ASSERT(!qy_needs_dequant || to_fp16_vk_1 != nullptr); // NOLINT GGML_ASSERT(dmmv != nullptr); // Allocate descriptor sets if (qx_needs_dequant) { - ggml_vk_pipeline_allocate_descriptor_sets(*to_fp16_vk_0, 1); + ggml_pipeline_allocate_descriptor_sets(ctx, *to_fp16_vk_0, 1); } if (qy_needs_dequant) { - ggml_vk_pipeline_allocate_descriptor_sets(*to_fp16_vk_1, y_non_contig ? 1 : ne12 * ne13); + ggml_pipeline_allocate_descriptor_sets(ctx, *to_fp16_vk_1, y_non_contig ? 1 : ne12 * ne13); } - ggml_vk_pipeline_allocate_descriptor_sets(*dmmv, ne12 * ne13); + ggml_pipeline_allocate_descriptor_sets(ctx, *dmmv, ne12 * ne13); if (x_non_contig) { - GGML_ASSERT(x_sz == ggml_vk_align_size(ggml_type_size(src0->type) * x_ne, vk_device.properties.limits.minStorageBufferOffsetAlignment)); - ggml_vk_cpy_to_contiguous(ctx, to_fp16_vk_0, src0, { *d_Qx, qx_buf_offset, VK_WHOLE_SIZE }, { *d_X, 0, VK_WHOLE_SIZE }, src0->type); + GGML_ASSERT(x_sz == ggml_vk_align_size(ggml_type_size(src0->type) * x_ne, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment)); + ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_0, src0, { d_Qx, qx_buf_offset, VK_WHOLE_SIZE }, { d_X, 0, VK_WHOLE_SIZE }, src0->type); } else if (load_x) { // copy data to device - ggml_vk_h2d_tensor_2d(ctx, d_Qx, 0, src0, 0, 0, ggml_nrows(src0)); + ggml_vk_h2d_tensor_2d(ctx, subctx, d_Qx, 0, src0, 0, 0, ggml_nrows(src0)); } if (y_non_contig) { GGML_ASSERT(y_sz == ggml_type_size(src1->type) * y_ne); - ggml_vk_cpy_to_contiguous(ctx, to_fp16_vk_1, src1, { *d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { *d_Y, 0, VK_WHOLE_SIZE }, src1->type); + ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE }, src1->type); } else if (load_y) { - ggml_vk_h2d_tensor_2d(ctx, d_Qy, 0, src1, 0, 0, ggml_nrows(src1)); + ggml_vk_h2d_tensor_2d(ctx, subctx, d_Qy, 0, src1, 0, 0, ggml_nrows(src1)); } for (uint64_t i13 = 0; i13 < ne13; i13++) { @@ -2306,34 +2496,34 @@ static void ggml_vk_mul_mat_vec_q_f16(vk_context * ctx, const ggml_tensor * src0 const uint64_t y_offset = y_buf_offset + y_sz * it_idx1; const uint64_t d_offset = d_buf_offset + d_sz * it_idx1; - const uint64_t y_buffer_offset = (y_offset / vk_device.properties.limits.minStorageBufferOffsetAlignment) * vk_device.properties.limits.minStorageBufferOffsetAlignment; + const uint64_t y_buffer_offset = (y_offset / ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) * ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment; const uint64_t y_shader_offset = y_offset - y_buffer_offset; - const uint64_t d_buffer_offset = (d_offset / vk_device.properties.limits.minStorageBufferOffsetAlignment) * vk_device.properties.limits.minStorageBufferOffsetAlignment; + const uint64_t d_buffer_offset = (d_offset / ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) * ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment; const uint64_t d_shader_offset = d_offset - d_buffer_offset; if (!y_non_contig && qy_needs_dequant) { const std::vector pc = { (int)ne11, (int)ne10, (int)ne10, (int)ne10 }; - ggml_vk_sync_buffers(ctx); - ggml_vk_dispatch_pipeline(ctx, *to_fp16_vk_1, { { *d_Qy, qy_offset, qy_sz }, { *d_Y, y_offset, y_sz } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)y_ne, 1, 1}); + ggml_vk_sync_buffers(subctx); + ggml_vk_dispatch_pipeline(ctx, subctx, *to_fp16_vk_1, { { d_Qy, qy_offset, qy_sz }, { d_Y, y_offset, y_sz } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)y_ne, 1, 1}); } // compute const std::array pc = { (int)ne00, (int)(y_shader_offset / ggml_type_size(src1->type)), (int)(d_shader_offset / ggml_type_size(dst->type))}; - ggml_vk_sync_buffers(ctx); - ggml_vk_dispatch_pipeline(ctx, *dmmv, { { *d_X, x_offset, x_sz }, { *d_Y, y_buffer_offset, y_sz + y_shader_offset }, { *d_D, d_buffer_offset, d_sz + d_shader_offset } }, 3 * sizeof(int), &pc, { (uint32_t)ne01, 1, 1}); + ggml_vk_sync_buffers(subctx); + ggml_vk_dispatch_pipeline(ctx, subctx, *dmmv, { { d_X, x_offset, x_sz }, { d_Y, y_buffer_offset, y_sz + y_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 3 * sizeof(int), &pc, { (uint32_t)ne01, 1, 1}); if (dst->backend == GGML_BACKEND_CPU) { // copy dst to host float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3); - ggml_vk_sync_buffers(ctx); - ggml_vk_buffer_read_async(ctx, d_D, d_offset, d, sizeof(float) * d_ne); + ggml_vk_sync_buffers(subctx); + ggml_vk_buffer_read_async(ctx, subctx, d_D, d_offset, d, sizeof(float) * d_ne); } } } } -static void ggml_vk_mul_mat_vec_p021_f16_f32(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_mul_mat_p021_f16_f32((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3]; std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3]; @@ -2362,13 +2552,13 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(vk_context * ctx, const ggml_tensor ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra; ggml_tensor_extra_gpu * extra_src1 = (ggml_tensor_extra_gpu *) src1->extra; - vk_buffer * d_Qy = nullptr; + vk_buffer d_Qy; size_t qy_buf_offset = 0; bool src1_uma = false; - if (vk_device.uma) { - ggml_vk_host_get(src1->data, d_Qy, qy_buf_offset); + if (ctx->device.lock()->uma) { + ggml_vk_host_get(ctx, src1->data, d_Qy, qy_buf_offset); src1_uma = d_Qy != nullptr; } @@ -2378,51 +2568,51 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(vk_context * ctx, const ggml_tensor const uint64_t y_ne = ne10 * ne11 * ne12; const uint64_t d_ne = ne01 * ne11 * ne12; - const uint64_t qx_sz = ggml_vk_align_size(ggml_type_size(src0->type) * x_ne / ggml_blck_size(src0->type), vk_device.properties.limits.minStorageBufferOffsetAlignment); + const uint64_t qx_sz = ggml_vk_align_size(ggml_type_size(src0->type) * x_ne / ggml_blck_size(src0->type), ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment); const uint64_t qy_sz = ggml_type_size(src1->type) * y_ne / ggml_blck_size(src1->type); const uint64_t d_sz = sizeof(float) * d_ne; - vk_buffer* d_D = &extra->buffer_gpu; + vk_buffer d_D = extra->buffer_gpu.lock(); const uint64_t d_buf_offset = extra->offset; GGML_ASSERT(d_D != nullptr); - vk_buffer* d_Qx = &extra_src0->buffer_gpu; + vk_buffer d_Qx = extra_src0->buffer_gpu.lock(); const uint64_t qx_buf_offset = extra_src0->offset; GGML_ASSERT(d_Qx != nullptr); if (load_y) { - d_Qy = &vk_prealloc_qy; + d_Qy = ctx->prealloc_qy; } else if (!src1_uma) { - d_Qy = &extra_src1->buffer_gpu; + d_Qy = extra_src1->buffer_gpu.lock(); qy_buf_offset = extra_src1->offset; GGML_ASSERT(d_Qx != nullptr); } // Allocate descriptor sets - ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline_mul_mat_vec_p021_f16_f32, 1); + ggml_pipeline_allocate_descriptor_sets(ctx, ctx->pipeline_mul_mat_vec_p021_f16_f32, 1); - const uint64_t qy_buffer_offset = (qy_buf_offset / vk_device.properties.limits.minStorageBufferOffsetAlignment) * vk_device.properties.limits.minStorageBufferOffsetAlignment; + const uint64_t qy_buffer_offset = (qy_buf_offset / ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) * ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment; const uint64_t qy_shader_offset = qy_buf_offset - qy_buffer_offset; - const uint64_t d_buffer_offset = (d_buf_offset / vk_device.properties.limits.minStorageBufferOffsetAlignment) * vk_device.properties.limits.minStorageBufferOffsetAlignment; + const uint64_t d_buffer_offset = (d_buf_offset / ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) * ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment; const uint64_t d_shader_offset = d_buf_offset - d_buffer_offset; if (load_y) { - ggml_vk_h2d_tensor_2d(ctx, d_Qy, qy_buf_offset, src1, 0, 0, ggml_nrows(src1)); + ggml_vk_h2d_tensor_2d(ctx, subctx, d_Qy, qy_buf_offset, src1, 0, 0, ggml_nrows(src1)); } // compute const std::array pc = { (uint32_t)ne00, (uint32_t)ne01, (uint32_t)ne02, (uint32_t)ne12, (uint32_t)(qy_shader_offset / ggml_type_size(src1->type)), (uint32_t)(d_shader_offset / ggml_type_size(dst->type)) }; - ggml_vk_sync_buffers(ctx); - ggml_vk_dispatch_pipeline(ctx, vk_pipeline_mul_mat_vec_p021_f16_f32, { { *d_Qx, qx_buf_offset, qx_sz }, { *d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { *d_D, d_buffer_offset, d_sz + d_shader_offset } }, 6 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 }); + ggml_vk_sync_buffers(subctx); + ggml_vk_dispatch_pipeline(ctx, subctx, ctx->pipeline_mul_mat_vec_p021_f16_f32, { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 6 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 }); if (dst->backend == GGML_BACKEND_CPU) { // copy dst to host float * d = (float *) dst->data; - ggml_vk_sync_buffers(ctx); - ggml_vk_buffer_read_async(ctx, d_D, d_buf_offset, d, sizeof(float) * d_ne); + ggml_vk_sync_buffers(subctx); + ggml_vk_buffer_read_async(ctx, subctx, d_D, d_buf_offset, d, sizeof(float) * d_ne); } } -static void ggml_vk_mul_mat_vec_nc_f16_f32(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_mul_mat_nc_f16_f32((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3]; std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3]; @@ -2454,13 +2644,13 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(vk_context * ctx, const ggml_tensor * ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra; ggml_tensor_extra_gpu * extra_src1 = (ggml_tensor_extra_gpu *) src1->extra; - vk_buffer * d_Qy = nullptr; + vk_buffer d_Qy = nullptr; size_t qy_buf_offset = 0; bool src1_uma = false; - if (vk_device.uma) { - ggml_vk_host_get(src1->data, d_Qy, qy_buf_offset); + if (ctx->device.lock()->uma) { + ggml_vk_host_get(ctx, src1->data, d_Qy, qy_buf_offset); src1_uma = d_Qy != nullptr; } @@ -2475,43 +2665,43 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(vk_context * ctx, const ggml_tensor * const uint64_t qy_sz = ggml_nbytes(src1); const uint64_t d_sz = sizeof(float) * d_ne; - vk_buffer* d_D = &extra->buffer_gpu; + vk_buffer d_D = extra->buffer_gpu.lock(); const uint64_t d_buf_offset = extra->offset; GGML_ASSERT(d_D != nullptr); - vk_buffer* d_Qx = &extra_src0->buffer_gpu; + vk_buffer d_Qx = extra_src0->buffer_gpu.lock(); const uint64_t qx_buf_offset = extra_src0->offset; GGML_ASSERT(d_Qx != nullptr); if (load_y) { - d_Qy = &vk_prealloc_qy; + d_Qy = ctx->prealloc_qy; } else { - d_Qy = &extra_src1->buffer_gpu; + d_Qy = extra_src1->buffer_gpu.lock(); qy_buf_offset = extra_src1->offset; GGML_ASSERT(d_Qx != nullptr); } // Allocate descriptor sets - ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline_mul_mat_vec_nc_f16_f32, 1); + ggml_pipeline_allocate_descriptor_sets(ctx, ctx->pipeline_mul_mat_vec_nc_f16_f32, 1); - const uint64_t qy_buffer_offset = (qy_buf_offset / vk_device.properties.limits.minStorageBufferOffsetAlignment) * vk_device.properties.limits.minStorageBufferOffsetAlignment; + const uint64_t qy_buffer_offset = (qy_buf_offset / ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) * ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment; const uint64_t qy_shader_offset = qy_buf_offset - qy_buffer_offset; - const uint64_t d_buffer_offset = (d_buf_offset / vk_device.properties.limits.minStorageBufferOffsetAlignment) * vk_device.properties.limits.minStorageBufferOffsetAlignment; + const uint64_t d_buffer_offset = (d_buf_offset / ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) * ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment; const uint64_t d_shader_offset = d_buf_offset - d_buffer_offset; if (load_y) { - ggml_vk_h2d_tensor_2d(ctx, d_Qy, qy_buf_offset, src1, 0, 0, ggml_nrows(src1)); + ggml_vk_h2d_tensor_2d(ctx, subctx, d_Qy, qy_buf_offset, src1, 0, 0, ggml_nrows(src1)); } // compute const std::array pc = { (uint32_t)ne00, (uint32_t)ne01, row_stride_x, channel_stride_x, (uint32_t)(ne12 / ne02), (uint32_t)(qy_shader_offset / ggml_type_size(src1->type)), (uint32_t)(d_shader_offset / ggml_type_size(dst->type)) }; - ggml_vk_sync_buffers(ctx); - ggml_vk_dispatch_pipeline(ctx, vk_pipeline_mul_mat_vec_nc_f16_f32, { { *d_Qx, qx_buf_offset, qx_sz }, { *d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { *d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 }); + ggml_vk_sync_buffers(subctx); + ggml_vk_dispatch_pipeline(ctx, subctx, ctx->pipeline_mul_mat_vec_nc_f16_f32, { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 }); if (dst->backend == GGML_BACKEND_CPU) { // copy dst to host float * d = (float *) dst->data; - ggml_vk_sync_buffers(ctx); - ggml_vk_buffer_read_async(ctx, d_D, d_buf_offset, d, sizeof(float) * d_ne); + ggml_vk_sync_buffers(subctx); + ggml_vk_buffer_read_async(ctx, subctx, d_D, d_buf_offset, d, sizeof(float) * d_ne); } } @@ -2528,22 +2718,22 @@ static bool ggml_vk_can_mul_mat(const ggml_tensor * src0, const ggml_tensor * sr ((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_GPU); } -static void ggml_vk_mul_mat(vk_context * ctx, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { +static void ggml_vk_mul_mat(ggml_backend_vk_context * ctx, vk_context * subctx, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_mul_mat(" << src0 << ", " << src1 << ", " << dst << ")" << std::endl; #endif if (src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) { - ggml_vk_mul_mat_vec_p021_f16_f32(ctx, src0, src1, dst); + ggml_vk_mul_mat_vec_p021_f16_f32(ctx, subctx, src0, src1, dst); } else if (src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) { - ggml_vk_mul_mat_vec_nc_f16_f32(ctx, src0, src1, dst); + ggml_vk_mul_mat_vec_nc_f16_f32(ctx, subctx, src0, src1, dst); } else if (src1->ne[1] == 1 && (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type))) { - ggml_vk_mul_mat_vec_q_f16(ctx, src0, src1, dst); + ggml_vk_mul_mat_vec_q_f16(ctx, subctx, src0, src1, dst); } else { - ggml_vk_mul_mat_q_f16(ctx, src0, src1, dst); + ggml_vk_mul_mat_q_f16(ctx, subctx, src0, src1, dst); } } -static void ggml_vk_op_repeat(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_vk_op_repeat(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { // guaranteed to be an integer due to the check in ggml_can_repeat const uint64_t ne0 = dst->ne[0]; const uint64_t ne1 = dst->ne[1]; @@ -2579,9 +2769,9 @@ static void ggml_vk_op_repeat(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) dst->extra; ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra; - const vk_buffer* src_buf = &extra_src0->buffer_gpu; + const vk_buffer src_buf = extra_src0->buffer_gpu.lock(); const uint64_t src_offset = extra_src0->offset; - vk_buffer* dst_buf = &extra->buffer_gpu; + vk_buffer dst_buf = extra->buffer_gpu.lock(); const uint64_t dst_offset = extra->offset; std::vector copies; @@ -2606,78 +2796,79 @@ static void ggml_vk_op_repeat(vk_context * ctx, const ggml_tensor * src0, const } } - ggml_vk_sync_buffers(ctx); - ctx->s->buffer.copyBuffer(src_buf->buffer, dst_buf->buffer, copies); + ggml_vk_sync_buffers(subctx); + subctx->s->buffer.copyBuffer(src_buf->buffer, dst_buf->buffer, copies); - (void) src1; + GGML_UNUSED(ctx); + GGML_UNUSED(src1); } -static vk_pipeline* ggml_vk_op_get_pipeline(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, ggml_op op) { +static vk_pipeline* ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, ggml_op op) { switch (op) { case GGML_OP_ADD: if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_add_f32; + return &ctx->pipeline_add_f32; } return nullptr; case GGML_OP_GET_ROWS: GGML_ASSERT(src1->type == GGML_TYPE_I32); if (dst->type == GGML_TYPE_F16) { - return &vk_pipeline_get_rows[src0->type]; + return &ctx->pipeline_get_rows[src0->type]; } if (dst->type == GGML_TYPE_F32) { - return &vk_pipeline_get_rows_f32[src0->type]; + return &ctx->pipeline_get_rows_f32[src0->type]; } return nullptr; case GGML_OP_MUL: if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_mul_f32; + return &ctx->pipeline_mul_f32; } return nullptr; case GGML_OP_SCALE: if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_scale_f32; + return &ctx->pipeline_scale_f32; } return nullptr; case GGML_OP_SQR: if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_sqr_f32; + return &ctx->pipeline_sqr_f32; } return nullptr; case GGML_OP_CLAMP: if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_clamp_f32; + return &ctx->pipeline_clamp_f32; } return nullptr; case GGML_OP_CPY: case GGML_OP_CONT: case GGML_OP_DUP: - return ggml_vk_get_cpy_pipeline(src0->type, dst->type); + return ggml_vk_get_cpy_pipeline(ctx, src0->type, dst->type); case GGML_OP_NORM: if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_norm_f32; + return &ctx->pipeline_norm_f32; } return nullptr; case GGML_OP_RMS_NORM: if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_rms_norm_f32; + return &ctx->pipeline_rms_norm_f32; } return nullptr; case GGML_OP_UNARY: switch (ggml_get_unary_op(dst)) { case GGML_UNARY_OP_SILU: if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_silu_f32; + return &ctx->pipeline_silu_f32; } break; case GGML_UNARY_OP_GELU: if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_gelu_f32; + return &ctx->pipeline_gelu_f32; } break; case GGML_UNARY_OP_RELU: if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_relu_f32; + return &ctx->pipeline_relu_f32; } break; default: @@ -2686,12 +2877,12 @@ static vk_pipeline* ggml_vk_op_get_pipeline(const ggml_tensor * src0, const ggml return nullptr; case GGML_OP_DIAG_MASK_INF: if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_diag_mask_inf_f32; + return &ctx->pipeline_diag_mask_inf_f32; } return nullptr; case GGML_OP_SOFT_MAX: if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_soft_max_f32; + return &ctx->pipeline_soft_max_f32; } return nullptr; case GGML_OP_ROPE: @@ -2706,17 +2897,17 @@ static vk_pipeline* ggml_vk_op_get_pipeline(const ggml_tensor * src0, const ggml if (is_neox) { if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_rope_neox_f32; + return &ctx->pipeline_rope_neox_f32; } if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { - return &vk_pipeline_rope_neox_f16; + return &ctx->pipeline_rope_neox_f16; } } else { if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return &vk_pipeline_rope_f32; + return &ctx->pipeline_rope_f32; } if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { - return &vk_pipeline_rope_f16; + return &ctx->pipeline_rope_f16; } } return nullptr; @@ -2735,13 +2926,8 @@ static ggml_vk_func_t ggml_vk_op_get_func(ggml_op op) { } } -#ifdef GGML_VULKAN_CHECK_RESULTS -static void ggml_vk_print_tensor(const ggml_tensor * tensor, const char * name); -static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * tensor); -#endif - template -static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, ggml_op op, const PC&& pc) { +static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, ggml_op op, const PC&& pc) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_op_f32((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3]; if (src1 != nullptr) { @@ -2768,7 +2954,7 @@ static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggm const uint64_t nb2 = dst->nb[2]; const uint64_t nb3 = dst->nb[3]; - vk_pipeline * pipeline = ggml_vk_op_get_pipeline(src0, src1, dst, op); + vk_pipeline * pipeline = ggml_vk_op_get_pipeline(ctx, src0, src1, dst, op); ggml_vk_func_t op_func; if (pipeline == nullptr) { @@ -2782,7 +2968,7 @@ static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggm GGML_ASSERT(false); } - op_func(ctx, src0, src1, dst); + op_func(ctx, subctx, src0, src1, dst); return; } @@ -2790,19 +2976,19 @@ static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggm ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra; ggml_tensor_extra_gpu * extra_src1 = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr; - vk_buffer * d_X = nullptr; + vk_buffer d_X = nullptr; size_t x_buf_offset = 0; - vk_buffer * d_Y = nullptr; + vk_buffer d_Y = nullptr; size_t y_buf_offset = 0; bool src0_uma = false; bool src1_uma = false; - if (vk_device.uma) { - ggml_vk_host_get(src0->data, d_X, x_buf_offset); + if (ctx->device.lock()->uma) { + ggml_vk_host_get(ctx, src0->data, d_X, x_buf_offset); src0_uma = d_X != nullptr; if (use_src1) { - ggml_vk_host_get(src1->data, d_Y, y_buf_offset); + ggml_vk_host_get(ctx, src1->data, d_Y, y_buf_offset); src1_uma = d_Y != nullptr; } } @@ -2810,30 +2996,31 @@ static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggm const bool transfer_src0 = src0->backend != GGML_BACKEND_GPU && !src0_uma; const bool transfer_src1 = use_src1 && src1->backend != GGML_BACKEND_GPU && !src1_uma; - uint64_t x_sz = ggml_vk_align_size(ggml_type_size(src0->type) * ne0, vk_device.properties.limits.minStorageBufferOffsetAlignment); - uint64_t y_sz = use_src1 ? ggml_vk_align_size(ggml_type_size(src1->type) * ne1, vk_device.properties.limits.minStorageBufferOffsetAlignment) : 0; + uint64_t x_sz = ggml_vk_align_size(ggml_type_size(src0->type) * ne0, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment); + uint64_t y_sz = use_src1 ? ggml_vk_align_size(ggml_type_size(src1->type) * ne1, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) : 0; uint64_t d_sz = ggml_type_size(dst->type) * ne0; + vk_buffer d_D = extra->buffer_gpu.lock(); + // Workaround for tiny tensor inputs on ROPE - if (use_src1 && src1->backend == GGML_BACKEND_GPU && y_sz > extra_src1->buffer_gpu.size) { + if (use_src1 && src1->backend == GGML_BACKEND_GPU && y_sz > d_D->size) { y_sz = VK_WHOLE_SIZE; } - vk_buffer* d_D = &extra->buffer_gpu; GGML_ASSERT(d_D != nullptr); - uint64_t d_buf_offset = (extra->offset / vk_device.properties.limits.minStorageBufferOffsetAlignment) * vk_device.properties.limits.minStorageBufferOffsetAlignment; + uint64_t d_buf_offset = (extra->offset / ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) * ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment; GGML_ASSERT(d_buf_offset == extra->offset || op == GGML_OP_CPY); // NOLINT if (transfer_src0) { - d_X = &vk_prealloc_qx; + d_X = ctx->prealloc_qx; } else if(!src0_uma) { - d_X = &extra_src0->buffer_gpu; + d_X = extra_src0->buffer_gpu.lock(); x_buf_offset = extra_src0->offset; GGML_ASSERT(d_X != nullptr); } if (transfer_src1) { - d_Y = &vk_prealloc_qy; + d_Y = ctx->prealloc_qy; } else if (use_src1 && !src1_uma) { - d_Y = &extra_src1->buffer_gpu; + d_Y = extra_src1->buffer_gpu.lock(); y_buf_offset = extra_src1->offset; GGML_ASSERT(d_Y != nullptr); } @@ -2856,16 +3043,16 @@ static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggm // copy src0 to device if (transfer_src0) { - ggml_vk_h2d_tensor_2d(ctx, d_X, 0, src0, 0, 0, ggml_nrows(src0)); - vk_staging_offset = x_sz * ne02 * ne03; + ggml_vk_h2d_tensor_2d(ctx, subctx, d_X, 0, src0, 0, 0, ggml_nrows(src0)); + ctx->staging_offset = x_sz * ne02 * ne03; } if (transfer_src1) { - ggml_vk_h2d_tensor_2d(ctx, d_Y, 0, src1, 0, 0, ggml_nrows(src1)); + ggml_vk_h2d_tensor_2d(ctx, subctx, d_Y, 0, src1, 0, 0, ggml_nrows(src1)); } // Single call if dimension 2 is contiguous if (op == GGML_OP_CPY || (ggml_is_contiguous(src0) && (src1 == nullptr || ggml_is_contiguous(src1)))) { - ggml_vk_pipeline_allocate_descriptor_sets(*pipeline, 1); + ggml_pipeline_allocate_descriptor_sets(ctx, *pipeline, 1); switch (dst->op) { case GGML_OP_NORM: @@ -2896,24 +3083,24 @@ static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggm if (!use_src1 && op == GGML_OP_SOFT_MAX) { // Empty src1 is possible on soft_max, but the shader needs a buffer - ggml_vk_sync_buffers(ctx); - ggml_vk_dispatch_pipeline(ctx, *pipeline, { { *d_X, x_buf_offset, x_sz }, { vk_prealloc_y, 0, vk_prealloc_y.size }, { *d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements); + ggml_vk_sync_buffers(subctx); + ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset, x_sz }, { ctx->prealloc_y, 0, ctx->prealloc_y->size }, { d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements); } else if (use_src1) { - ggml_vk_sync_buffers(ctx); - ggml_vk_dispatch_pipeline(ctx, *pipeline, { { *d_X, x_buf_offset, x_sz }, { *d_Y, y_buf_offset, y_sz }, { *d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements); + ggml_vk_sync_buffers(subctx); + ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset, x_sz }, { d_Y, y_buf_offset, y_sz }, { d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements); } else { - ggml_vk_sync_buffers(ctx); - ggml_vk_dispatch_pipeline(ctx, *pipeline, { { *d_X, x_buf_offset, x_sz }, { *d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements); + ggml_vk_sync_buffers(subctx); + ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset, x_sz }, { d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements); } if (dst->backend == GGML_BACKEND_CPU && op == GGML_OP_CPY) { - ggml_vk_d2h_tensor_2d(ctx, d_D, 0, dst); + ggml_vk_d2h_tensor_2d(ctx, subctx, d_D, 0, dst); } else if(dst->backend == GGML_BACKEND_CPU) { // copy dst to host float * d = (float *) dst->data; - ggml_vk_buffer_read_async(ctx, d_D, 0, d, d_sz); + ggml_vk_buffer_read_async(ctx, subctx, d_D, 0, d, d_sz); } } else { - ggml_vk_pipeline_allocate_descriptor_sets(*pipeline, ne02 * ne03); + ggml_pipeline_allocate_descriptor_sets(ctx, *pipeline, ne02 * ne03); switch (dst->op) { case GGML_OP_NORM: @@ -2940,60 +3127,60 @@ static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggm if (!use_src1 && op == GGML_OP_SOFT_MAX) { // Empty src1 is possible on soft_max, but the shader needs a buffer - ggml_vk_sync_buffers(ctx); - ggml_vk_dispatch_pipeline(ctx, *pipeline, { { *d_X, x_buf_offset, x_sz }, { vk_prealloc_y, 0, vk_prealloc_y.size }, { *d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements); + ggml_vk_sync_buffers(subctx); + ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset, x_sz }, { ctx->prealloc_y, 0, ctx->prealloc_y->size }, { d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements); } else if (use_src1) { - ggml_vk_sync_buffers(ctx); - ggml_vk_dispatch_pipeline(ctx, *pipeline, { { *d_X, x_buf_offset + x_offset, x_sz }, { *d_Y, y_buf_offset + y_offset, y_sz }, { *d_D, d_buf_offset + d_offset, d_sz } }, sizeof(PC), &pc, elements); + ggml_vk_sync_buffers(subctx); + ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset + x_offset, x_sz }, { d_Y, y_buf_offset + y_offset, y_sz }, { d_D, d_buf_offset + d_offset, d_sz } }, sizeof(PC), &pc, elements); } else { - ggml_vk_sync_buffers(ctx); - ggml_vk_dispatch_pipeline(ctx, *pipeline, { { *d_X, x_buf_offset + x_offset, x_sz }, { *d_D, d_buf_offset + d_offset, d_sz } }, sizeof(PC), &pc, elements); + ggml_vk_sync_buffers(subctx); + ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset + x_offset, x_sz }, { d_D, d_buf_offset + d_offset, d_sz } }, sizeof(PC), &pc, elements); } if (dst->backend == GGML_BACKEND_CPU) { // copy dst to host - ggml_vk_buffer_read_async(ctx, d_D, d_buf_offset + d_offset, (char *) dst->data + i02*nb2 + i03*nb3, d_sz); + ggml_vk_buffer_read_async(ctx, subctx, d_D, d_buf_offset + d_offset, (char *) dst->data + i02*nb2 + i03*nb3, d_sz); } } } } } -static void ggml_vk_repeat(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - ggml_vk_op_f32(ctx, src0, src1, dst, GGML_OP_REPEAT, { (uint32_t)ggml_nelements(src0), (uint32_t)ggml_nelements(src1), 0.0f, 0.0f }); +static void ggml_vk_repeat(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + ggml_vk_op_f32(ctx, subctx, src0, src1, dst, GGML_OP_REPEAT, { (uint32_t)ggml_nelements(src0), (uint32_t)ggml_nelements(src1), 0.0f, 0.0f }); } -static void ggml_vk_get_rows(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - ggml_vk_op_f32(ctx, src0, src1, dst, GGML_OP_GET_ROWS, { (uint32_t)ggml_nelements(src0), (uint32_t)ggml_nelements(src1), 0.0f, 0.0f }); +static void ggml_vk_get_rows(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + ggml_vk_op_f32(ctx, subctx, src0, src1, dst, GGML_OP_GET_ROWS, { (uint32_t)ggml_nelements(src0), (uint32_t)ggml_nelements(src1), 0.0f, 0.0f }); } -static void ggml_vk_add(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - ggml_vk_op_f32(ctx, src0, src1, dst, GGML_OP_ADD, { (uint32_t)ggml_nelements(src0), (uint32_t)ggml_nelements(src1), 0.0f, 0.0f }); +static void ggml_vk_add(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + ggml_vk_op_f32(ctx, subctx, src0, src1, dst, GGML_OP_ADD, { (uint32_t)ggml_nelements(src0), (uint32_t)ggml_nelements(src1), 0.0f, 0.0f }); } -static void ggml_vk_mul(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - ggml_vk_op_f32(ctx, src0, src1, dst, GGML_OP_MUL, { (uint32_t)ggml_nelements(src0), (uint32_t)ggml_nelements(src1), 0.0f, 0.0f }); +static void ggml_vk_mul(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + ggml_vk_op_f32(ctx, subctx, src0, src1, dst, GGML_OP_MUL, { (uint32_t)ggml_nelements(src0), (uint32_t)ggml_nelements(src1), 0.0f, 0.0f }); } -static void ggml_vk_scale(vk_context * ctx, const ggml_tensor * src0, ggml_tensor * dst) { +static void ggml_vk_scale(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, ggml_tensor * dst) { float * op_params = (float *)dst->op_params; - ggml_vk_op_f32(ctx, src0, nullptr, dst, GGML_OP_SCALE, { (uint32_t)ggml_nelements(src0), 0, op_params[0], 0.0f }); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, dst, GGML_OP_SCALE, { (uint32_t)ggml_nelements(src0), 0, op_params[0], 0.0f }); } -static void ggml_vk_sqr(vk_context * ctx, const ggml_tensor * src0, ggml_tensor * dst) { - ggml_vk_op_f32(ctx, src0, nullptr, dst, GGML_OP_SQR, { (uint32_t)ggml_nelements(src0), 0, 0.0f, 0.0f }); +static void ggml_vk_sqr(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, ggml_tensor * dst) { + ggml_vk_op_f32(ctx, subctx, src0, nullptr, dst, GGML_OP_SQR, { (uint32_t)ggml_nelements(src0), 0, 0.0f, 0.0f }); } -static void ggml_vk_clamp(vk_context * ctx, const ggml_tensor * src0, ggml_tensor * dst) { +static void ggml_vk_clamp(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, ggml_tensor * dst) { float * op_params = (float *)dst->op_params; - ggml_vk_op_f32(ctx, src0, nullptr, dst, GGML_OP_CLAMP, { (uint32_t)ggml_nelements(src0), 0, op_params[0], op_params[1] }); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, dst, GGML_OP_CLAMP, { (uint32_t)ggml_nelements(src0), 0, op_params[0], op_params[1] }); } -static void ggml_vk_cpy(vk_context * ctx, const ggml_tensor * src0, ggml_tensor * dst) { +static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, ggml_tensor * dst) { ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) dst->extra; const int src0_type_size = ggml_type_size(src0->type); const int dst_type_size = ggml_type_size(dst->type); - const uint32_t d_offset = (extra->offset % vk_device.properties.limits.minStorageBufferOffsetAlignment) / dst_type_size; - ggml_vk_op_f32(ctx, src0, nullptr, dst, GGML_OP_CPY, { + const uint32_t d_offset = (extra->offset % ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) / dst_type_size; + ggml_vk_op_f32(ctx, subctx, src0, nullptr, dst, GGML_OP_CPY, { (uint32_t)ggml_nelements(src0), (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, @@ -3001,30 +3188,30 @@ static void ggml_vk_cpy(vk_context * ctx, const ggml_tensor * src0, ggml_tensor }); } -static void ggml_vk_norm(vk_context * ctx, const ggml_tensor * src0, ggml_tensor * dst) { - ggml_vk_op_f32(ctx, src0, nullptr, dst, GGML_OP_NORM, { (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], 0.0f, 0.0f }); +static void ggml_vk_norm(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, ggml_tensor * dst) { + ggml_vk_op_f32(ctx, subctx, src0, nullptr, dst, GGML_OP_NORM, { (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], 0.0f, 0.0f }); } -static void ggml_vk_rms_norm(vk_context * ctx, const ggml_tensor * src0, ggml_tensor * dst) { +static void ggml_vk_rms_norm(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, ggml_tensor * dst) { float * op_params = (float *)dst->op_params; - ggml_vk_op_f32(ctx, src0, nullptr, dst, GGML_OP_RMS_NORM, { (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], op_params[0], 0.0f }); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, dst, GGML_OP_RMS_NORM, { (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], op_params[0], 0.0f }); } -static void ggml_vk_unary(vk_context * ctx, const ggml_tensor * src0, ggml_tensor * dst) { - ggml_vk_op_f32(ctx, src0, nullptr, dst, GGML_OP_UNARY, { (uint32_t)ggml_nelements(src0), 0, 0.0f, 0.0f }); +static void ggml_vk_unary(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, ggml_tensor * dst) { + ggml_vk_op_f32(ctx, subctx, src0, nullptr, dst, GGML_OP_UNARY, { (uint32_t)ggml_nelements(src0), 0, 0.0f, 0.0f }); } -static void ggml_vk_diag_mask_inf(vk_context * ctx, const ggml_tensor * src0, ggml_tensor * dst) { +static void ggml_vk_diag_mask_inf(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, ggml_tensor * dst) { int32_t * op_params = (int32_t *)dst->op_params; - ggml_vk_op_f32(ctx, src0, nullptr, dst, GGML_OP_DIAG_MASK_INF, { (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], op_params[0] }); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, dst, GGML_OP_DIAG_MASK_INF, { (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], op_params[0] }); } -static void ggml_vk_soft_max(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_vk_soft_max(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { float * op_params = (float *)dst->op_params; - ggml_vk_op_f32(ctx, src0, src1, dst, GGML_OP_SOFT_MAX, { (uint32_t)src0->ne[0], (uint32_t)(src1 != nullptr ? ggml_nrows(src1) : 0), op_params[0], 0.0f }); + ggml_vk_op_f32(ctx, subctx, src0, src1, dst, GGML_OP_SOFT_MAX, { (uint32_t)src0->ne[0], (uint32_t)(src1 != nullptr ? ggml_nrows(src1) : 0), op_params[0], 0.0f }); } -static void ggml_vk_rope(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_vk_rope(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { const int n_dims = ((int32_t *) dst->op_params)[1]; const int mode = ((int32_t *) dst->op_params)[2]; // const int n_ctx = ((int32_t *) dst->op_params)[3]; @@ -3047,19 +3234,19 @@ static void ggml_vk_rope(vk_context * ctx, const ggml_tensor * src0, const ggml_ if (is_neox) { const float theta_scale = powf(freq_base, -2.0f/n_dims); const float inv_ndims = -1.0f / n_dims; - ggml_vk_op_f32(ctx, src0, src1, dst, GGML_OP_ROPE, { (uint32_t)src0->ne[0], (uint32_t)n_dims, freq_scale, (uint32_t)src0->ne[1], freq_base, ext_factor, attn_factor, corr_dims[0], corr_dims[1], 0.0f, 0.0f, theta_scale, inv_ndims }); + ggml_vk_op_f32(ctx, subctx, src0, src1, dst, GGML_OP_ROPE, { (uint32_t)src0->ne[0], (uint32_t)n_dims, freq_scale, (uint32_t)src0->ne[1], freq_base, ext_factor, attn_factor, corr_dims[0], corr_dims[1], 0.0f, 0.0f, theta_scale, inv_ndims }); } else { - ggml_vk_op_f32(ctx, src0, src1, dst, GGML_OP_ROPE, { (uint32_t)src0->ne[0], freq_scale, (uint32_t)src0->ne[1], freq_base, ext_factor, attn_factor, corr_dims[0], corr_dims[1], 0.0f, 0.0f }); + ggml_vk_op_f32(ctx, subctx, src0, src1, dst, GGML_OP_ROPE, { (uint32_t)src0->ne[0], freq_scale, (uint32_t)src0->ne[1], freq_base, ext_factor, attn_factor, corr_dims[0], corr_dims[1], 0.0f, 0.0f }); } } -static void ggml_vk_nop(vk_context * ctx, const ggml_tensor * src0, ggml_tensor * dst) { +static void ggml_vk_nop(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, ggml_tensor * dst) { // If backend is CPU, data from src0 has to be copied off the device if (dst->backend == GGML_BACKEND_CPU) { ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra; - vk_buffer * d_D = &extra_src0->buffer_gpu; - ggml_vk_sync_buffers(ctx); - ggml_vk_buffer_read_async(ctx, d_D, 0, dst->data, d_D->size); + vk_buffer d_D = extra_src0->buffer_gpu.lock(); + ggml_vk_sync_buffers(subctx); + ggml_vk_buffer_read_async(ctx, subctx, d_D, 0, dst->data, d_D->size); } } @@ -3096,7 +3283,7 @@ static void ggml_vk_print_matrix_area(const void * data, ggml_type type, int ne0 } template -static void ggml_vk_test_matmul(size_t m, size_t n, size_t k, size_t batch, size_t num_it, int split_k, int shader_size) { +static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t n, size_t k, size_t batch, size_t num_it, int split_k, int shader_size) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_test_matmul(" << m << ", " << n << ", " << k << ", " << batch << ", " << num_it << ", " << split_k << ", " << shader_size << ")" << std::endl; #endif @@ -3108,39 +3295,39 @@ static void ggml_vk_test_matmul(size_t m, size_t n, size_t k, size_t batch, size std::string shname; if (shader_size == 0) { if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f32_aligned_s; + p = &ctx->pipeline_matmul_f32_aligned_s; shname = "F32_ALIGNED_S"; } else if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f16_f32_aligned_s; + p = &ctx->pipeline_matmul_f16_f32_aligned_s; shname = "F16_F32_ALIGNED_S"; } else if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f16_aligned_s; + p = &ctx->pipeline_matmul_f16_aligned_s; shname = "F16_ALIGNED_S"; } else { GGML_ASSERT(false); } } else if (shader_size == 1) { if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f32_aligned_m; + p = &ctx->pipeline_matmul_f32_aligned_m; shname = "F32_ALIGNED_M"; } else if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f16_f32_aligned_m; + p = &ctx->pipeline_matmul_f16_f32_aligned_m; shname = "F16_F32_ALIGNED_M"; } else if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f16_aligned_m; + p = &ctx->pipeline_matmul_f16_aligned_m; shname = "F16_ALIGNED_M"; } else { GGML_ASSERT(false); } } else if (shader_size == 2) { if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f32_aligned_l; + p = &ctx->pipeline_matmul_f32_aligned_l; shname = "F32_ALIGNED_L"; } else if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f16_f32_aligned_l; + p = &ctx->pipeline_matmul_f16_f32_aligned_l; shname = "F16_F32_ALIGNED_L"; } else if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f16_aligned_l; + p = &ctx->pipeline_matmul_f16_aligned_l; shname = "F16_ALIGNED_L"; } else { GGML_ASSERT(false); @@ -3154,56 +3341,56 @@ static void ggml_vk_test_matmul(size_t m, size_t n, size_t k, size_t batch, size if (k != kpad) { if (shader_size == 0) { if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f32_s; + p = &ctx->pipeline_matmul_f32_s; shname = "F32_S"; } else if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f16_f32_s; + p = &ctx->pipeline_matmul_f16_f32_s; shname = "F16_F32_S"; } else if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f16_s; + p = &ctx->pipeline_matmul_f16_s; shname = "F16_S"; } } else if (shader_size == 1) { if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f32_m; + p = &ctx->pipeline_matmul_f32_m; shname = "F32_M"; } else if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f16_f32_m; + p = &ctx->pipeline_matmul_f16_f32_m; shname = "F16_F32_M"; } else if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f16_m; + p = &ctx->pipeline_matmul_f16_m; shname = "F16_M"; } } else if (shader_size == 2) { if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f32_l; + p = &ctx->pipeline_matmul_f32_l; shname = "F32_L"; } else if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f16_f32_l; + p = &ctx->pipeline_matmul_f16_f32_l; shname = "F16_F32_L"; } else if (std::is_same() && std::is_same()) { - p = &vk_pipeline_matmul_f16_l; + p = &ctx->pipeline_matmul_f16_l; shname = "F16_L"; } } } - ggml_vk_pipeline_allocate_descriptor_sets(*p, num_it); + ggml_pipeline_allocate_descriptor_sets(ctx, *p, num_it); if (split_k > 1) { - ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline_matmul_split_k_reduce, num_it); + ggml_pipeline_allocate_descriptor_sets(ctx, ctx->pipeline_matmul_split_k_reduce, num_it); - if (vk_prealloc_split_k.size < sizeof(float) * d_ne * split_k) { + if (ctx->prealloc_split_k == nullptr || ctx->prealloc_split_k->size < sizeof(float) * d_ne * split_k) { // Resize buffer - if (vk_prealloc_split_k.size > 0) { - ggml_vk_destroy_buffer(vk_prealloc_split_k); + if (ctx->prealloc_split_k != nullptr) { + ggml_vk_destroy_buffer(ctx->prealloc_split_k); } - vk_prealloc_split_k = ggml_vk_create_buffer_check(sizeof(float) * d_ne * split_k, vk::MemoryPropertyFlagBits::eDeviceLocal); + ctx->prealloc_split_k = ggml_vk_create_buffer_check(ctx, sizeof(float) * d_ne * split_k, vk::MemoryPropertyFlagBits::eDeviceLocal); } } - vk_buffer d_X = ggml_vk_create_buffer_check(sizeof(X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal); - vk_buffer d_Y = ggml_vk_create_buffer_check(sizeof(Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal); - vk_buffer d_D = ggml_vk_create_buffer_check(sizeof(float) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal); + vk_buffer d_X = ggml_vk_create_buffer_check(ctx, sizeof(X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal); + vk_buffer d_Y = ggml_vk_create_buffer_check(ctx, sizeof(Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal); + vk_buffer d_D = ggml_vk_create_buffer_check(ctx, sizeof(float) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal); X_TYPE* x = (X_TYPE *) malloc(sizeof(X_TYPE) * x_ne); Y_TYPE* y = (Y_TYPE *) malloc(sizeof(Y_TYPE) * y_ne); @@ -3228,26 +3415,26 @@ static void ggml_vk_test_matmul(size_t m, size_t n, size_t k, size_t batch, size } } - ggml_vk_buffer_write(&d_X, 0, x, sizeof(X_TYPE) * k * m * batch); - ggml_vk_buffer_write(&d_Y, 0, y, sizeof(Y_TYPE) * k * n * batch); + ggml_vk_buffer_write(ctx, d_X, 0, x, sizeof(X_TYPE) * k * m * batch); + ggml_vk_buffer_write(ctx, d_Y, 0, y, sizeof(Y_TYPE) * k * n * batch); - vk_context * ctx = ggml_vk_create_context(vk_device.compute_queue); + vk_context * subctx = ggml_vk_create_context(ctx, ctx->device.lock()->compute_queue); for (size_t i = 0; i < num_it; i++) { - ggml_vk_ctx_begin(ctx); - ggml_vk_matmul(ctx, *p, ggml_vk_subbuffer(d_X), ggml_vk_subbuffer(d_Y), ggml_vk_subbuffer(d_D), ggml_vk_subbuffer(vk_prealloc_split_k), m, n, k, k, k, m, split_k, batch, batch, batch, 1, 1, k*m, k*n, m*n); - ggml_vk_ctx_end(ctx); + ggml_vk_ctx_begin(ctx, subctx); + ggml_vk_matmul(ctx, subctx, *p, ggml_vk_subbuffer(d_X), ggml_vk_subbuffer(d_Y), ggml_vk_subbuffer(d_D), ggml_vk_subbuffer(ctx->prealloc_split_k), m, n, k, k, k, m, split_k, batch, batch, batch, 1, 1, k*m, k*n, m*n); + ggml_vk_ctx_end(subctx); } auto begin = std::chrono::high_resolution_clock::now(); - ggml_vk_submit(ctx, vk_fence); - VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "ggml_vk_test_matmul waitForFences"); - vk_device.device.resetFences({ vk_fence }); + ggml_vk_submit(subctx, ctx->fence); + VK_CHECK(ctx->device.lock()->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_test_matmul waitForFences"); + ctx->device.lock()->device.resetFences({ ctx->fence }); auto end = std::chrono::high_resolution_clock::now(); double time = std::chrono::duration_cast(end-begin).count() / 1000.0; // copy dst to host - ggml_vk_buffer_read(&d_D, 0, d, sizeof(float) * d_ne); + ggml_vk_buffer_read(ctx, d_D, 0, d, sizeof(float) * d_ne); float * d_chk = (float *) malloc(sizeof(float) * d_ne); @@ -3285,14 +3472,14 @@ static void ggml_vk_test_matmul(size_t m, size_t n, size_t k, size_t batch, size src1_ggml->data = y; tensor_ggml->data = d_chk; - vk_disable = true; + ctx->disable = true; ggml_cgraph * cgraph = ggml_new_graph(ggml_ctx); ggml_build_forward_expand(cgraph, tensor_ggml); ggml_graph_compute_with_ctx(ggml_ctx, cgraph, 1); - vk_disable = false; + ctx->disable = false; ggml_free(ggml_ctx); @@ -3325,7 +3512,7 @@ static void ggml_vk_test_matmul(size_t m, size_t n, size_t k, size_t batch, size if (split_k > 1) { float * split_k_buf = (float *) malloc(sizeof(float) * d_ne * split_k); - ggml_vk_buffer_read(&vk_prealloc_split_k, 0, split_k_buf, sizeof(float) * d_ne * split_k); + ggml_vk_buffer_read(ctx, ctx->prealloc_split_k, 0, split_k_buf, sizeof(float) * d_ne * split_k); std::cerr << "d_buf0: " << std::endl << std::endl; ggml_vk_print_matrix_area(split_k_buf, GGML_TYPE_F32, m, n, first_err_m, first_err_n, first_err_b); @@ -3345,15 +3532,15 @@ static void ggml_vk_test_matmul(size_t m, size_t n, size_t k, size_t batch, size free(d_chk); - ggml_vk_queue_cleanup(vk_device.transfer_queue); - ggml_vk_queue_cleanup(vk_device.compute_queue); + ggml_vk_queue_cleanup(ctx, ctx->device.lock()->transfer_queue); + ggml_vk_queue_cleanup(ctx, ctx->device.lock()->compute_queue); ggml_vk_destroy_buffer(d_X); ggml_vk_destroy_buffer(d_Y); ggml_vk_destroy_buffer(d_D); - ggml_vk_pipeline_cleanup(*p); - ggml_vk_pipeline_cleanup(vk_pipeline_matmul_split_k_reduce); + ggml_pipeline_cleanup(*p); + ggml_pipeline_cleanup(ctx->pipeline_matmul_split_k_reduce); free(x); free(y); @@ -3392,7 +3579,7 @@ static void ggml_vk_print_tensor_area(const ggml_tensor * tensor, int i0, int i1 } } -static void ggml_vk_test_h2d_nc(size_t ne0, size_t ne1, size_t ne2, size_t ne3) { +static void ggml_vk_test_h2d_nc(ggml_backend_vk_context * ctx, size_t ne0, size_t ne1, size_t ne2, size_t ne3) { const size_t ne = ne0 * ne1 * ne2 * ne3; ggml_init_params iparams = { @@ -3406,7 +3593,7 @@ static void ggml_vk_test_h2d_nc(size_t ne0, size_t ne1, size_t ne2, size_t ne3) ggml_tensor * tensor = ggml_new_tensor_4d(ggml_ctx, GGML_TYPE_F32, ne0, ne2, ne1, ne3); // NOLINT ggml_tensor * result_tensor = ggml_new_tensor_4d(ggml_ctx, GGML_TYPE_F32, ne0, ne1, ne2, ne3); - float * data = (float *) ggml_vk_host_malloc(ggml_nbytes(tensor)); + float * data = (float *) ggml_vk_host_malloc(ctx, ggml_nbytes(tensor)); tensor->data = data; float * result_data = (float *) malloc(ggml_nbytes(tensor)); @@ -3426,19 +3613,19 @@ static void ggml_vk_test_h2d_nc(size_t ne0, size_t ne1, size_t ne2, size_t ne3) data[i] = (rand() / (float)RAND_MAX) * 2.0f - 1.0f; } - vk_context * ctx = ggml_vk_create_context(vk_device.compute_queue); - ggml_vk_ctx_begin(ctx); + vk_context * subctx = ggml_vk_create_context(ctx, ctx->device.lock()->compute_queue); + ggml_vk_ctx_begin(ctx, subctx); - vk_buffer buffer = ggml_vk_create_buffer_check(ggml_nbytes(tensor), vk::MemoryPropertyFlagBits::eDeviceLocal); + vk_buffer buffer = ggml_vk_create_buffer_check(ctx, ggml_nbytes(tensor), vk::MemoryPropertyFlagBits::eDeviceLocal); - ggml_vk_h2d_tensor_2d(ctx, &buffer, 0, tensor, 0, 0, ggml_nrows(tensor)); + ggml_vk_h2d_tensor_2d(ctx, subctx, buffer, 0, tensor, 0, 0, ggml_nrows(tensor)); - ggml_vk_ctx_end(ctx); - ggml_vk_submit(ctx, vk_fence); - VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "ggml_vk_compute_forward waitForFences"); - vk_device.device.resetFences({ vk_fence }); + ggml_vk_ctx_end(subctx); + ggml_vk_submit(subctx, ctx->fence); + VK_CHECK(ctx->device.lock()->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_test_h2d_nc waitForFences"); + ctx->device.lock()->device.resetFences({ ctx->fence }); - ggml_vk_buffer_read(&buffer, 0, result_data, ggml_nbytes(tensor)); + ggml_vk_buffer_read(ctx, buffer, 0, result_data, ggml_nbytes(tensor)); double avg_err = 0.0; int first_err_i0 = -1; @@ -3483,22 +3670,22 @@ static void ggml_vk_test_h2d_nc(size_t ne0, size_t ne1, size_t ne2, size_t ne3) ggml_vk_destroy_buffer(buffer); - ggml_vk_host_free(data); + ggml_vk_host_free(ctx, data); free(result_data); } -static void ggml_vk_test_transfer(size_t ne, bool pinned) { +static void ggml_vk_test_transfer(ggml_backend_vk_context * ctx, size_t ne, bool pinned) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_test_transfer(" << ne << ")" << std::endl; #endif // Check transfers are correct - vk_buffer buffer = ggml_vk_create_buffer_check(sizeof(float) * ne, vk::MemoryPropertyFlagBits::eDeviceLocal); + vk_buffer buffer = ggml_vk_create_buffer_check(ctx, sizeof(float) * ne, vk::MemoryPropertyFlagBits::eDeviceLocal); float * x; float * y; if (pinned) { - x = (float *) ggml_vk_host_malloc(sizeof(float) * ne); - y = (float *) ggml_vk_host_malloc(sizeof(float) * ne); + x = (float *) ggml_vk_host_malloc(ctx, sizeof(float) * ne); + y = (float *) ggml_vk_host_malloc(ctx, sizeof(float) * ne); } else { x = (float *) malloc(sizeof(float) * ne); y = (float *) malloc(sizeof(float) * ne); @@ -3508,42 +3695,42 @@ static void ggml_vk_test_transfer(size_t ne, bool pinned) { x[i] = rand() / (float)RAND_MAX; } - vk_context * ctx = ggml_vk_create_context(vk_device.compute_queue); - ggml_vk_ctx_begin(ctx); + vk_context * subctx = ggml_vk_create_context(ctx, ctx->device.lock()->compute_queue); + ggml_vk_ctx_begin(ctx, subctx); auto begin = std::chrono::high_resolution_clock::now(); - ggml_vk_buffer_write_async(ctx, &buffer, 0, x, sizeof(float) * ne); + ggml_vk_buffer_write_async(ctx, subctx, buffer, 0, x, sizeof(float) * ne); - for (auto& cpy : ctx->in_memcpys) { + for (auto& cpy : subctx->in_memcpys) { memcpy(cpy.dst, cpy.src, cpy.n); } - ctx->in_memcpys.clear(); + subctx->in_memcpys.clear(); - ggml_vk_ctx_end(ctx); - ggml_vk_submit(ctx, vk_fence); - VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "ggml_vk_compute_forward waitForFences"); - vk_device.device.resetFences({ vk_fence }); + ggml_vk_ctx_end(subctx); + ggml_vk_submit(subctx, ctx->fence); + VK_CHECK(ctx->device.lock()->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_test_transfer waitForFences"); + ctx->device.lock()->device.resetFences({ ctx->fence }); auto end = std::chrono::high_resolution_clock::now(); double ms_to_gpu = std::chrono::duration_cast(end-begin).count() / 1000.0; - ggml_vk_ctx_begin(ctx); + ggml_vk_ctx_begin(ctx, subctx); begin = std::chrono::high_resolution_clock::now(); - ggml_vk_buffer_read_async(ctx, &buffer, 0, y, sizeof(float) * ne); + ggml_vk_buffer_read_async(ctx, subctx, buffer, 0, y, sizeof(float) * ne); - ggml_vk_ctx_end(ctx); - ggml_vk_submit(ctx, vk_fence); - VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "ggml_vk_compute_forward waitForFences"); - vk_device.device.resetFences({ vk_fence }); + ggml_vk_ctx_end(subctx); + ggml_vk_submit(subctx, ctx->fence); + VK_CHECK(ctx->device.lock()->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_test_transfer waitForFences"); + ctx->device.lock()->device.resetFences({ ctx->fence }); - for (auto& cpy : ctx->out_memcpys) { + for (auto& cpy : subctx->out_memcpys) { memcpy(cpy.dst, cpy.src, cpy.n); } - ctx->out_memcpys.clear(); + subctx->out_memcpys.clear(); end = std::chrono::high_resolution_clock::now(); @@ -3561,15 +3748,15 @@ static void ggml_vk_test_transfer(size_t ne, bool pinned) { ggml_vk_destroy_buffer(buffer); if (pinned) { - ggml_vk_host_free(x); - ggml_vk_host_free(y); + ggml_vk_host_free(ctx, x); + ggml_vk_host_free(ctx, y); } else { free(x); free(y); } } -static void ggml_vk_test_dequant(size_t ne, ggml_type quant) { +static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_type quant) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_test_dequant(" << ne << ")" << std::endl; #endif @@ -3578,8 +3765,8 @@ static void ggml_vk_test_dequant(size_t ne, ggml_type quant) { const size_t qx_sz = ne * ggml_type_size(quant)/ggml_blck_size(quant); float * x = (float *) malloc(x_sz); void * qx = malloc(qx_sz); - vk_buffer qx_buf = ggml_vk_create_buffer_check(qx_sz, vk::MemoryPropertyFlagBits::eDeviceLocal); - vk_buffer x_buf = ggml_vk_create_buffer_check(x_sz_f16, vk::MemoryPropertyFlagBits::eDeviceLocal); + vk_buffer qx_buf = ggml_vk_create_buffer_check(ctx, qx_sz, vk::MemoryPropertyFlagBits::eDeviceLocal); + vk_buffer x_buf = ggml_vk_create_buffer_check(ctx, x_sz_f16, vk::MemoryPropertyFlagBits::eDeviceLocal); ggml_fp16_t * x_chk = (ggml_fp16_t *) malloc(x_sz_f16); for (size_t i = 0; i < ne; i++) { @@ -3588,7 +3775,7 @@ static void ggml_vk_test_dequant(size_t ne, ggml_type quant) { std::vector hist_cur(1 << 4, 0); - vk_pipeline& p = vk_pipeline_dequant[quant]; + vk_pipeline& p = ctx->pipeline_dequant[quant]; switch(quant) { case GGML_TYPE_Q4_0: @@ -3625,27 +3812,26 @@ static void ggml_vk_test_dequant(size_t ne, ggml_type quant) { GGML_ASSERT(false); } - ggml_vk_pipeline_allocate_descriptor_sets(p, 1); + ggml_pipeline_allocate_descriptor_sets(ctx, p, 1); - ggml_vk_buffer_write(&qx_buf, 0, qx, qx_sz); + ggml_vk_buffer_write(ctx, qx_buf, 0, qx, qx_sz); - vk_context * ctx = ggml_vk_create_context(vk_device.compute_queue); - ggml_vk_ctx_begin(ctx); + vk_context * subctx = ggml_vk_create_context(ctx, ctx->device.lock()->compute_queue); + ggml_vk_ctx_begin(ctx, subctx); const std::vector pc = { 1, (int)ne, (int)ne, (int)ne }; - ggml_vk_sync_buffers(ctx); - ggml_vk_dispatch_pipeline(ctx, p, { { qx_buf, 0, qx_sz }, { x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1}); - ggml_vk_ctx_end(ctx); + ggml_vk_dispatch_pipeline(ctx, subctx, p, { { qx_buf, 0, qx_sz }, { x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1}); + ggml_vk_ctx_end(subctx); auto begin = std::chrono::high_resolution_clock::now(); - ggml_vk_submit(ctx, vk_fence); - VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "ggml_vk_compute_forward waitForFences"); - vk_device.device.resetFences({ vk_fence }); + ggml_vk_submit(subctx, ctx->fence); + VK_CHECK(ctx->device.lock()->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_test_dequant waitForFences"); + ctx->device.lock()->device.resetFences({ ctx->fence }); auto end = std::chrono::high_resolution_clock::now(); double ms_dequant = std::chrono::duration_cast(end-begin).count() / 1000.0; - ggml_vk_buffer_read(&x_buf, 0, x_chk, x_sz_f16); + ggml_vk_buffer_read(ctx, x_buf, 0, x_chk, x_sz_f16); double avg_err = 0.0; for (size_t i = 0; i < ne; i++) { @@ -3687,15 +3873,15 @@ static ggml_tensor * ggml_vk_find_last_use(const ggml_tensor * node, ggml_cgraph return nullptr; } -void ggml_vk_preallocate_buffers_graph(ggml_tensor * node){ +static void ggml_vk_preallocate_buffers_graph(ggml_backend_vk_context * ctx, ggml_tensor * node){ #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_preallocate_buffers_graph(" << node << ")" << std::endl; + std::cerr << "ggml_ctx->preallocate_buffers_graph(" << node << ")" << std::endl; #endif const bool any_on_device = node->backend == GGML_BACKEND_GPU || (node->src[0] != nullptr && (node->src[0]->backend == GGML_BACKEND_GPU || node->src[0]->backend == GGML_BACKEND_GPU_SPLIT)) || (node->src[1] != nullptr && (node->src[1]->backend == GGML_BACKEND_GPU)); - if (vk_disable || (!any_on_device && node->op != GGML_OP_MUL_MAT)) { + if (ctx->disable || (!any_on_device && node->op != GGML_OP_MUL_MAT)) { return; } @@ -3735,16 +3921,16 @@ void ggml_vk_preallocate_buffers_graph(ggml_tensor * node){ const uint32_t y_ne = ne10 * ne11; const uint32_t d_ne = ne20 * ne21; - const uint64_t qx_sz = use_src0 ? ggml_vk_align_size(ggml_type_size(src0->type) * x_ne / ggml_blck_size(src0->type), vk_device.properties.limits.minStorageBufferOffsetAlignment) * ne02 * ne03 : 0; - const uint64_t qy_sz = use_src1 ? ggml_vk_align_size(ggml_type_size(src1->type) * y_ne / ggml_blck_size(src1->type), vk_device.properties.limits.minStorageBufferOffsetAlignment) * ne12 * ne13 : 0; - const uint64_t x_sz = use_src0 ? ggml_vk_align_size(sizeof(ggml_fp16_t) * x_ne, vk_device.properties.limits.minStorageBufferOffsetAlignment) * ne02 * ne03 : 0; - const uint64_t y_sz = use_src1 ? ggml_vk_align_size(f16_f32_kernel ? sizeof(float) * y_ne : sizeof(ggml_fp16_t) * y_ne, vk_device.properties.limits.minStorageBufferOffsetAlignment) * ne12 * ne13 : 0; - uint64_t d_sz = ggml_vk_align_size(ggml_type_size(node->type) * d_ne, vk_device.properties.limits.minStorageBufferOffsetAlignment) * ne22 * ne23; + const uint64_t qx_sz = use_src0 ? ggml_vk_align_size(ggml_type_size(src0->type) * x_ne / ggml_blck_size(src0->type), ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) * ne02 * ne03 : 0; + const uint64_t qy_sz = use_src1 ? ggml_vk_align_size(ggml_type_size(src1->type) * y_ne / ggml_blck_size(src1->type), ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) * ne12 * ne13 : 0; + const uint64_t x_sz = use_src0 ? ggml_vk_align_size(sizeof(ggml_fp16_t) * x_ne, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) * ne02 * ne03 : 0; + const uint64_t y_sz = use_src1 ? ggml_vk_align_size(f16_f32_kernel ? sizeof(float) * y_ne : sizeof(ggml_fp16_t) * y_ne, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) * ne12 * ne13 : 0; + uint64_t d_sz = ggml_vk_align_size(ggml_type_size(node->type) * d_ne, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) * ne22 * ne23; const uint64_t split_k_size = split_k > 1 ? d_sz * 4 : 0; - if (extra->buffer_gpu.size == 0) { + if (extra->buffer_gpu.expired()) { // Workaround for CPU backend BLAS matmul calls - extra->buffer_gpu = ggml_vk_create_buffer_temp(d_sz); + extra->buffer_gpu = ggml_vk_create_buffer_temp(ctx, d_sz); } switch (node->op) { @@ -3779,23 +3965,23 @@ void ggml_vk_preallocate_buffers_graph(ggml_tensor * node){ } break; case GGML_OP_MUL_MAT: - if (vk_prealloc_size_qx < qx_sz) { - vk_prealloc_size_qx = qx_sz; + if (ctx->prealloc_size_qx < qx_sz) { + ctx->prealloc_size_qx = qx_sz; } - if (vk_prealloc_size_qy < qy_sz) { - vk_prealloc_size_qy = qy_sz; + if (ctx->prealloc_size_qy < qy_sz) { + ctx->prealloc_size_qy = qy_sz; } - if (vk_prealloc_size_x < x_sz) { - vk_prealloc_size_x = x_sz; + if (ctx->prealloc_size_x < x_sz) { + ctx->prealloc_size_x = x_sz; } - if (vk_prealloc_size_y < y_sz) { - vk_prealloc_size_y = y_sz; + if (ctx->prealloc_size_y < y_sz) { + ctx->prealloc_size_y = y_sz; } - if (vk_prealloc_size_split_k < split_k_size) { - vk_prealloc_size_split_k = split_k_size; + if (ctx->prealloc_size_split_k < split_k_size) { + ctx->prealloc_size_split_k = split_k_size; } - if (vk_staging_size < x_sz + y_sz) { - vk_staging_size = x_sz + y_sz; + if (ctx->staging_size < x_sz + y_sz) { + ctx->staging_size = x_sz + y_sz; } break; default: @@ -3803,29 +3989,29 @@ void ggml_vk_preallocate_buffers_graph(ggml_tensor * node){ } } -void ggml_vk_preallocate_buffers() { - if (vk_disable) { +static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) { + if (ctx->disable) { return; } #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_preallocate_buffers()" << std::endl; - std::cerr << "qx_size: " << vk_prealloc_size_qx << " qy_size: " << vk_prealloc_size_qy << " x_size: " << vk_prealloc_size_x << " y_size: " << vk_prealloc_size_y << " split_k_size: " << vk_prealloc_size_split_k << std::endl; + std::cerr << "ggml_ctx->preallocate_buffers()" << std::endl; + std::cerr << "qx_size: " << ctx->prealloc_size_qx << " qy_size: " << ctx->prealloc_size_qy << " x_size: " << ctx->prealloc_size_x << " y_size: " << ctx->prealloc_size_y << " split_k_size: " << ctx->prealloc_size_split_k << std::endl; #endif #if defined(GGML_VULKAN_RUN_TESTS) - vk_staging = ggml_vk_create_buffer_check(100ul * 1024ul * 1024ul, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); - ggml_vk_test_transfer(8192 * 1000, false); - ggml_vk_test_transfer(8192 * 1000, true); + ctx->staging = ggml_vk_create_buffer_check(ctx, 100ul * 1024ul * 1024ul, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); + ggml_vk_test_transfer(ctx, 8192 * 1000, false); + ggml_vk_test_transfer(ctx, 8192 * 1000, true); - ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q4_0); - ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q4_1); - ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q5_0); - ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q5_1); - ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q8_0); - ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q2_K); - ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q3_K); - ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q4_K); - ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q5_K); - ggml_vk_test_dequant(2560 * 7680, GGML_TYPE_Q6_K); + ggml_vk_test_dequant(ctx, 2560 * 7680, GGML_TYPE_Q4_0); + ggml_vk_test_dequant(ctx, 2560 * 7680, GGML_TYPE_Q4_1); + ggml_vk_test_dequant(ctx, 2560 * 7680, GGML_TYPE_Q5_0); + ggml_vk_test_dequant(ctx, 2560 * 7680, GGML_TYPE_Q5_1); + ggml_vk_test_dequant(ctx, 2560 * 7680, GGML_TYPE_Q8_0); + ggml_vk_test_dequant(ctx, 2560 * 7680, GGML_TYPE_Q2_K); + ggml_vk_test_dequant(ctx, 2560 * 7680, GGML_TYPE_Q3_K); + ggml_vk_test_dequant(ctx, 2560 * 7680, GGML_TYPE_Q4_K); + ggml_vk_test_dequant(ctx, 2560 * 7680, GGML_TYPE_Q5_K); + ggml_vk_test_dequant(ctx, 2560 * 7680, GGML_TYPE_Q6_K); const std::vector vals { 8, 8, 8, @@ -3852,76 +4038,76 @@ void ggml_vk_preallocate_buffers() { }; const size_t num_it = 1; for (size_t i = 0; i < vals.size(); i += 3) { - ggml_vk_test_matmul(vals[i], vals[i + 1], vals[i + 2], 2, num_it, 1, 0); - ggml_vk_test_matmul(vals[i], vals[i + 1], vals[i + 2], 2, num_it, 1, 1); - ggml_vk_test_matmul(vals[i], vals[i + 1], vals[i + 2], 2, num_it, 1, 2); - ggml_vk_test_matmul(vals[i], vals[i + 1], vals[i + 2], 2, num_it, 4, 0); - ggml_vk_test_matmul(vals[i], vals[i + 1], vals[i + 2], 2, num_it, 4, 1); - ggml_vk_test_matmul(vals[i], vals[i + 1], vals[i + 2], 2, num_it, 4, 2); + ggml_vk_test_matmul(ctx, vals[i], vals[i + 1], vals[i + 2], 2, num_it, 1, 0); + ggml_vk_test_matmul(ctx, vals[i], vals[i + 1], vals[i + 2], 2, num_it, 1, 1); + ggml_vk_test_matmul(ctx, vals[i], vals[i + 1], vals[i + 2], 2, num_it, 1, 2); + ggml_vk_test_matmul(ctx, vals[i], vals[i + 1], vals[i + 2], 2, num_it, 4, 0); + ggml_vk_test_matmul(ctx, vals[i], vals[i + 1], vals[i + 2], 2, num_it, 4, 1); + ggml_vk_test_matmul(ctx, vals[i], vals[i + 1], vals[i + 2], 2, num_it, 4, 2); std::cerr << std::endl; } GGML_ASSERT(false); #endif - if (vk_prealloc_size_qx > 0 && vk_prealloc_qx.size < vk_prealloc_size_qx) { + if (ctx->prealloc_qx == nullptr || (ctx->prealloc_size_qx > 0 && ctx->prealloc_qx->size < ctx->prealloc_size_qx)) { // Resize buffer - if (vk_prealloc_qx.size > 0) { - ggml_vk_destroy_buffer(vk_prealloc_qx); + if (ctx->prealloc_qx != nullptr) { + ggml_vk_destroy_buffer(ctx->prealloc_qx); } - vk_prealloc_qx = ggml_vk_create_buffer_device(vk_prealloc_size_qx); + ctx->prealloc_qx = ggml_vk_create_buffer_device(ctx, ctx->prealloc_size_qx); } - if (vk_prealloc_size_qy > 0 && vk_prealloc_qy.size < vk_prealloc_size_qy) { + if (ctx->prealloc_qy == nullptr || (ctx->prealloc_size_qy > 0 && ctx->prealloc_qy->size < ctx->prealloc_size_qy)) { // Resize buffer - if (vk_prealloc_qy.size > 0) { - ggml_vk_destroy_buffer(vk_prealloc_qy); + if (ctx->prealloc_qy != nullptr) { + ggml_vk_destroy_buffer(ctx->prealloc_qy); } - vk_prealloc_qy = ggml_vk_create_buffer_device(vk_prealloc_size_qy); + ctx->prealloc_qy = ggml_vk_create_buffer_device(ctx, ctx->prealloc_size_qy); } - if (vk_prealloc_size_x > 0 && vk_prealloc_x.size < vk_prealloc_size_x) { + if (ctx->prealloc_x == nullptr || (ctx->prealloc_size_x > 0 && ctx->prealloc_x->size < ctx->prealloc_size_x)) { // Resize buffer - if (vk_prealloc_x.size > 0) { - ggml_vk_destroy_buffer(vk_prealloc_x); + if (ctx->prealloc_x != nullptr) { + ggml_vk_destroy_buffer(ctx->prealloc_x); } - vk_prealloc_x = ggml_vk_create_buffer_device(vk_prealloc_size_x); + ctx->prealloc_x = ggml_vk_create_buffer_device(ctx, ctx->prealloc_size_x); } - if (vk_prealloc_size_y > 0 && vk_prealloc_y.size < vk_prealloc_size_y) { + if (ctx->prealloc_y == nullptr || (ctx->prealloc_size_y > 0 && ctx->prealloc_y->size < ctx->prealloc_size_y)) { // Resize buffer - if (vk_prealloc_y.size > 0) { - ggml_vk_destroy_buffer(vk_prealloc_y); + if (ctx->prealloc_y != nullptr) { + ggml_vk_destroy_buffer(ctx->prealloc_y); } - vk_prealloc_y = ggml_vk_create_buffer_device(vk_prealloc_size_y); + ctx->prealloc_y = ggml_vk_create_buffer_device(ctx, ctx->prealloc_size_y); } - if (vk_prealloc_size_split_k > 0 && vk_prealloc_split_k.size < vk_prealloc_size_split_k) { + if (ctx->prealloc_split_k == nullptr || (ctx->prealloc_size_split_k > 0 && ctx->prealloc_split_k->size < ctx->prealloc_size_split_k)) { // Resize buffer - if (vk_prealloc_split_k.size > 0) { - ggml_vk_destroy_buffer(vk_prealloc_split_k); + if (ctx->prealloc_split_k != nullptr) { + ggml_vk_destroy_buffer(ctx->prealloc_split_k); } - vk_prealloc_split_k = ggml_vk_create_buffer_device(vk_prealloc_size_split_k); + ctx->prealloc_split_k = ggml_vk_create_buffer_device(ctx, ctx->prealloc_size_split_k); } - if (vk_staging_size > 0 && vk_staging.size < vk_staging_size) { + if (ctx->staging == nullptr || (ctx->staging_size > 0 && ctx->staging->size < ctx->staging_size)) { // Resize buffer - if (vk_staging.size > 0) { - ggml_vk_destroy_buffer(vk_staging); + if (ctx->staging != nullptr) { + ggml_vk_destroy_buffer(ctx->staging); } - vk_staging = ggml_vk_create_buffer_check(vk_staging_size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); + ctx->staging = ggml_vk_create_buffer_check(ctx, ctx->staging_size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); } } -void ggml_vk_build_graph(ggml_tensor * node, bool last_node){ +static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * node, bool last_node){ const bool any_on_device = node->backend == GGML_BACKEND_GPU || (node->src[0] != nullptr && (node->src[0]->backend == GGML_BACKEND_GPU || node->src[0]->backend == GGML_BACKEND_GPU_SPLIT)) || (node->src[1] != nullptr && node->src[1]->backend == GGML_BACKEND_GPU); - if (vk_disable || (!any_on_device && node->op != GGML_OP_MUL_MAT) || (node->op == GGML_OP_MUL_MAT && !any_on_device && !ggml_vk_can_mul_mat(node->src[0], node->src[1], node))) { + if (ctx->disable || (!any_on_device && node->op != GGML_OP_MUL_MAT) || (node->op == GGML_OP_MUL_MAT && !any_on_device && !ggml_vk_can_mul_mat(node->src[0], node->src[1], node))) { return; } #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_build_graph(" << node << ", " << ggml_op_name(node->op) << ")" << std::endl; #endif - vk_semaphore_idx = 0; - vk_staging_offset = 0; + ctx->semaphore_idx = 0; + ctx->staging_offset = 0; const ggml_tensor * src0 = node->src[0]; const ggml_tensor * src1 = node->src[1]; @@ -3969,44 +4155,44 @@ void ggml_vk_build_graph(ggml_tensor * node, bool last_node){ return; } - if (vk_ctx == nullptr) { - vk_ctx = ggml_vk_create_context(vk_device.compute_queue); - ggml_vk_ctx_begin(vk_ctx); + if (ctx->compute_ctx == nullptr) { + ctx->compute_ctx = ggml_vk_create_context(ctx, ctx->device.lock()->compute_queue); + ggml_vk_ctx_begin(ctx, ctx->compute_ctx); } switch (node->op) { case GGML_OP_REPEAT: - ggml_vk_repeat(vk_ctx, src0, src1, node); + ggml_vk_repeat(ctx, ctx->compute_ctx, src0, src1, node); break; case GGML_OP_GET_ROWS: - ggml_vk_get_rows(vk_ctx, src0, src1, node); + ggml_vk_get_rows(ctx, ctx->compute_ctx, src0, src1, node); break; case GGML_OP_ADD: - ggml_vk_add(vk_ctx, src0, src1, node); + ggml_vk_add(ctx, ctx->compute_ctx, src0, src1, node); break; case GGML_OP_MUL: - ggml_vk_mul(vk_ctx, src0, src1, node); + ggml_vk_mul(ctx, ctx->compute_ctx, src0, src1, node); break; case GGML_OP_SCALE: - ggml_vk_scale(vk_ctx, src0, node); + ggml_vk_scale(ctx, ctx->compute_ctx, src0, node); break; case GGML_OP_SQR: - ggml_vk_sqr(vk_ctx, src0, node); + ggml_vk_sqr(ctx, ctx->compute_ctx, src0, node); break; case GGML_OP_CLAMP: - ggml_vk_clamp(vk_ctx, src0, node); + ggml_vk_clamp(ctx, ctx->compute_ctx, src0, node); break; case GGML_OP_CPY: case GGML_OP_CONT: case GGML_OP_DUP: - ggml_vk_cpy(vk_ctx, src0, node); + ggml_vk_cpy(ctx, ctx->compute_ctx, src0, node); break; case GGML_OP_RESHAPE: @@ -4014,15 +4200,15 @@ void ggml_vk_build_graph(ggml_tensor * node, bool last_node){ case GGML_OP_PERMUTE: case GGML_OP_TRANSPOSE: case GGML_OP_NONE: - ggml_vk_nop(vk_ctx, src0, node); + ggml_vk_nop(ctx, ctx->compute_ctx, src0, node); break; case GGML_OP_NORM: - ggml_vk_norm(vk_ctx, src0, node); + ggml_vk_norm(ctx, ctx->compute_ctx, src0, node); break; case GGML_OP_RMS_NORM: - ggml_vk_rms_norm(vk_ctx, src0, node); + ggml_vk_rms_norm(ctx, ctx->compute_ctx, src0, node); break; case GGML_OP_UNARY: @@ -4030,26 +4216,26 @@ void ggml_vk_build_graph(ggml_tensor * node, bool last_node){ case GGML_UNARY_OP_SILU: case GGML_UNARY_OP_GELU: case GGML_UNARY_OP_RELU: - ggml_vk_unary(vk_ctx, src0, node); + ggml_vk_unary(ctx, ctx->compute_ctx, src0, node); break; default: return; } break; case GGML_OP_DIAG_MASK_INF: - ggml_vk_diag_mask_inf(vk_ctx, src0, node); + ggml_vk_diag_mask_inf(ctx, ctx->compute_ctx, src0, node); break; case GGML_OP_SOFT_MAX: - ggml_vk_soft_max(vk_ctx, src0, src1, node); + ggml_vk_soft_max(ctx, ctx->compute_ctx, src0, src1, node); break; case GGML_OP_ROPE: - ggml_vk_rope(vk_ctx, src0, src1, node); + ggml_vk_rope(ctx, ctx->compute_ctx, src0, src1, node); break; case GGML_OP_MUL_MAT: - ggml_vk_mul_mat(vk_ctx, src0, src1, node); + ggml_vk_mul_mat(ctx, ctx->compute_ctx, src0, src1, node); break; default: @@ -4057,7 +4243,7 @@ void ggml_vk_build_graph(ggml_tensor * node, bool last_node){ } extra->ready = true; - extra->ctx_idx = vk_ctx->idx; + extra->ctx_idx = ctx->compute_ctx->idx; #ifdef GGML_VULKAN_CHECK_RESULTS // Force context reset on each node so that each tensor ends up in its own context @@ -4066,18 +4252,18 @@ void ggml_vk_build_graph(ggml_tensor * node, bool last_node){ #endif if (node->backend == GGML_BACKEND_CPU || last_node) { - ggml_vk_ctx_end(vk_ctx); - vk_ctx->exit_tensor = node; - vk_ctx = nullptr; + ggml_vk_ctx_end(ctx->compute_ctx); + ctx->compute_ctx->exit_tensor = node; + ctx->compute_ctx = nullptr; } } -bool ggml_vk_compute_forward(ggml_compute_params * params, ggml_tensor * tensor){ +static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor){ const bool any_on_device = tensor->backend == GGML_BACKEND_GPU || (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT)) || (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_GPU); - if (vk_disable || (!any_on_device && tensor->op != GGML_OP_MUL_MAT)) { + if (ctx->disable || (!any_on_device && tensor->op != GGML_OP_MUL_MAT)) { return false; } @@ -4145,33 +4331,33 @@ bool ggml_vk_compute_forward(ggml_compute_params * params, ggml_tensor * tensor) #endif #ifdef GGML_VULKAN_CHECK_RESULTS - ggml_vk_check_results_0(params, tensor); + ggml_vk_check_results_0(ctx, params, tensor); #endif GGML_ASSERT(extra->ready); - vk_context& ctx = vk_gc.contexts[extra->ctx_idx]; + vk_context& subctx = ctx->gc.contexts[extra->ctx_idx]; // Only run if ctx hasn't been submitted yet - if (!ctx.seqs.empty()) { + if (!subctx.seqs.empty()) { // Do staging buffer copies - for (auto& cpy : ctx.in_memcpys) { + for (auto& cpy : subctx.in_memcpys) { memcpy(cpy.dst, cpy.src, cpy.n); } - ggml_vk_submit(&ctx, vk_fence); + ggml_vk_submit(&subctx, ctx->fence); } - if (tensor == ctx.exit_tensor) { - VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "ggml_vk_compute_forward waitForFences"); - vk_device.device.resetFences({ vk_fence }); + if (tensor == subctx.exit_tensor) { + VK_CHECK(ctx->device.lock()->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_compute_forward waitForFences"); + ctx->device.lock()->device.resetFences({ ctx->fence }); // Do staging buffer copies - for (auto& cpy : ctx.out_memcpys) { + for (auto& cpy : subctx.out_memcpys) { memcpy(cpy.dst, cpy.src, cpy.n); } - ctx.in_memcpys.clear(); - ctx.out_memcpys.clear(); + subctx.in_memcpys.clear(); + subctx.out_memcpys.clear(); } extra->ready = false; @@ -4179,90 +4365,204 @@ bool ggml_vk_compute_forward(ggml_compute_params * params, ggml_tensor * tensor) return true; } -void ggml_vk_graph_cleanup() { - if (vk_disable) { +// Clean up after graph processing is done +static void ggml_vk_graph_cleanup(ggml_backend_vk_context * ctx) { + if (ctx->disable) { return; } #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_graph_cleanup()" << std::endl; #endif - for (auto& buffer : vk_gc.temp_buffers) { - ggml_vk_pool_free(buffer); + for (auto& buffer : ctx->gc.temp_buffers) { + ggml_vk_pool_free(ctx, buffer); } - vk_gc.temp_buffers.clear(); + ctx->gc.temp_buffers.clear(); - for (auto * pipeline : vk_gc.pipelines) { - ggml_vk_pipeline_cleanup(*pipeline); - } - vk_gc.pipelines.clear(); - - ggml_vk_queue_cleanup(vk_device.compute_queue); - ggml_vk_queue_cleanup(vk_device.transfer_queue); - - for (size_t i = 0; i < vk_gc.semaphores.size(); i++) { - vk_device.device.destroySemaphore({ vk_gc.semaphores[i].s }); - } - vk_gc.semaphores.clear(); - - for (size_t i = 0; i < vk_gc.tl_semaphores.size(); i++) { - vk_device.device.destroySemaphore({ vk_gc.tl_semaphores[i].s }); - } - vk_gc.tl_semaphores.clear(); - - vk_event_idx = 0; - - for (auto& event : vk_gc.events) { - vk_device.device.resetEvent(event); + for (auto * pipeline : ctx->gc.pipelines) { + ggml_pipeline_cleanup(*pipeline); } - vk_staging_offset = 0; + ggml_vk_queue_cleanup(ctx, ctx->device.lock()->compute_queue); + ggml_vk_queue_cleanup(ctx, ctx->device.lock()->transfer_queue); - vk_ctx = nullptr; - vk_gc.contexts.clear(); + for (size_t i = 0; i < ctx->gc.semaphores.size(); i++) { + ctx->device.lock()->device.destroySemaphore({ ctx->gc.semaphores[i].s }); + } + ctx->gc.semaphores.clear(); + + for (size_t i = 0; i < ctx->gc.tl_semaphores.size(); i++) { + ctx->device.lock()->device.destroySemaphore({ ctx->gc.tl_semaphores[i].s }); + } + ctx->gc.tl_semaphores.clear(); + ctx->semaphore_idx = 0; + + ctx->event_idx = 0; + + for (auto& event : ctx->gc.events) { + ctx->device.lock()->device.resetEvent(event); + } + + ctx->staging_offset = 0; + + ctx->compute_ctx = nullptr; + ctx->transfer_ctx = nullptr; + ctx->gc.contexts.clear(); } -static void ggml_vk_cleanup() { +// Clean up on backend free +static void ggml_vk_cleanup(ggml_backend_vk_context * ctx) { #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_cleanup()" << std::endl; + std::cerr << "ggml_vk_cleanup(" << ctx->idx << ")" << std::endl; #endif - ggml_vk_destroy_buffer(vk_prealloc_x); - ggml_vk_destroy_buffer(vk_prealloc_y); - ggml_vk_destroy_buffer(vk_prealloc_split_k); - ggml_vk_destroy_buffer(vk_staging); - ggml_vk_destroy_buffer(vk_sync_staging); + ggml_vk_graph_cleanup(ctx); - vk_prealloc_size_x = 0; - vk_prealloc_size_y = 0; - vk_prealloc_size_split_k = 0; - vk_staging_size = 0; + ggml_vk_destroy_buffer(ctx->prealloc_qx); + ggml_vk_destroy_buffer(ctx->prealloc_qy); + ggml_vk_destroy_buffer(ctx->prealloc_x); + ggml_vk_destroy_buffer(ctx->prealloc_y); + ggml_vk_destroy_buffer(ctx->prealloc_split_k); + ggml_vk_destroy_buffer(ctx->staging); + ggml_vk_destroy_buffer(ctx->sync_staging); - for (auto& event : vk_gc.events) { - vk_device.device.destroyEvent(event); + for (auto& buffer : ctx->buffer_pool) { + ggml_vk_destroy_buffer(buffer); } - vk_gc.events.clear(); + + ctx->prealloc_size_qx = 0; + ctx->prealloc_size_qy = 0; + ctx->prealloc_size_x = 0; + ctx->prealloc_size_y = 0; + ctx->prealloc_size_split_k = 0; + ctx->staging_size = 0; + + for (auto& event : ctx->gc.events) { + ctx->device.lock()->device.destroyEvent(event); + } + ctx->gc.events.clear(); + + for (auto* pipeline : ctx->gc.pipelines) { + ggml_vk_destroy_pipeline(ctx, pipeline); + } + ctx->gc.pipelines.clear(); + + ctx->device.lock()->device.destroyFence(ctx->fence); + + ctx->device.lock()->device.destroyCommandPool(ctx->device.lock()->compute_queue.pool); + if (!ctx->device.lock()->single_queue) { + ctx->device.lock()->device.destroyCommandPool(ctx->device.lock()->transfer_queue.pool); + } +} + +GGML_CALL int ggml_vk_get_device_count() { + ggml_vk_instance_init(); + + return vk_instance.device_indices.size(); +} + +GGML_CALL void ggml_vk_get_device_description(int device, char * description, size_t description_size) { + ggml_vk_instance_init(); + + std::vector devices = vk_instance.instance.enumeratePhysicalDevices(); + + vk::PhysicalDeviceProperties props; + devices[device].getProperties(&props); + + snprintf(description, description_size, "%s", props.deviceName.data()); +} + +// CPU assist interface + +void ggml_vk_init_cpu_assist() { + ggml_vk_instance_init(); + + std::cerr << "ggml_vulkan: Found " << ggml_vk_get_device_count() << " Vulkan devices:" << std::endl; + + for (size_t i = 0; i < ggml_vk_get_device_count(); i++) { + ggml_vk_print_gpu_info(i); + } + // Initialize the first backend to make sure CPU matrix multiplications can be offloaded. + ggml_backend_vk_init(0); +} + +void ggml_vk_preallocate_buffers_graph_cpu_assist(ggml_tensor * node) { + ggml_backend_vk_context * ctx = &vk_instance.contexts[0]; + + if (!ctx->initialized) { + return; + } + + ggml_vk_preallocate_buffers_graph(ctx, node); +} + +void ggml_vk_preallocate_buffers_cpu_assist() { + ggml_backend_vk_context * ctx = &vk_instance.contexts[0]; + + if (!ctx->initialized) { + return; + } + + ggml_vk_preallocate_buffers(ctx); +} + +void ggml_vk_build_graph_cpu_assist(ggml_tensor * node, bool last_node) { + ggml_backend_vk_context * ctx = &vk_instance.contexts[0]; + + if (!ctx->initialized) { + return; + } + + ggml_vk_build_graph(ctx, node, last_node); +} + +bool ggml_vk_compute_forward_cpu_assist(ggml_compute_params * params, ggml_tensor * tensor){ + ggml_backend_vk_context * ctx = &vk_instance.contexts[0]; + + if (!ctx->initialized) { + return false; + } + + return ggml_vk_compute_forward(ctx, params, tensor); +} + +void ggml_vk_graph_cleanup_cpu_assist() { + ggml_backend_vk_context * ctx = &vk_instance.contexts[0]; + + if (!ctx->initialized) { + return; + } + + ggml_vk_graph_cleanup(ctx); +} + +void ggml_vk_free_cpu_assist() { + ggml_backend_vk_context * ctx = &vk_instance.contexts[0]; + + if (!ctx->initialized || vk_instance.backends[0] == nullptr) { + return; + } + + ggml_backend_vk_free(vk_instance.backends[0]); } // backend interface #define UNUSED GGML_UNUSED -struct ggml_backend_vk_context { - std::string name; -}; - // device backend static void * const vk_ptr_base = (void *)(uintptr_t) 0x1000; // NOLINT struct ggml_backend_vk_buffer_context { + ggml_backend_vk_context * ctx; vk_buffer dev_buffer; ggml_tensor_extra_gpu * temp_tensor_extras = nullptr; size_t temp_tensor_extra_index = 0; std::string name; - ggml_backend_vk_buffer_context(vk_buffer dev_buffer) : + ggml_backend_vk_buffer_context(ggml_backend_vk_context * ctx, vk_buffer&& dev_buffer, std::string& name) : + ctx(ctx), dev_buffer(dev_buffer), - name(GGML_VK_NAME) { + name(name) { } ~ggml_backend_vk_buffer_context() { @@ -4294,6 +4594,9 @@ GGML_CALL static bool ggml_backend_buffer_is_vk(ggml_backend_buffer_t buffer) { } GGML_CALL static void ggml_backend_vk_buffer_free_buffer(ggml_backend_buffer_t buffer) { +#ifdef GGML_VULKAN_DEBUG + std::cerr << "ggml_backend_vk_buffer_free_buffer()" << std::endl; +#endif ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context; ggml_vk_destroy_buffer(ctx->dev_buffer); delete ctx; @@ -4313,6 +4616,7 @@ GGML_CALL static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t b ggml_tensor_extra_gpu * extra = ctx->ggml_vk_alloc_temp_tensor_extra(); if (tensor->view_src != nullptr && tensor->view_src->extra != nullptr) { + GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft); ggml_tensor_extra_gpu * extra_view = (ggml_tensor_extra_gpu *) tensor->view_src->extra; extra->buffer_gpu = extra_view->buffer_gpu; extra->offset = extra_view->offset + tensor->view_offs; @@ -4331,11 +4635,13 @@ GGML_CALL static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t bu #endif GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU); + ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context; + ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra; - ggml_vk_buffer_write(&extra->buffer_gpu, extra->offset + offset, data, size); + vk_buffer buf = extra->buffer_gpu.lock(); - UNUSED(buffer); + ggml_vk_buffer_write(ctx->ctx, buf, extra->offset + offset, data, size); } GGML_CALL static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { @@ -4344,31 +4650,35 @@ GGML_CALL static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t bu #endif GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU); + ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context; + ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra; - ggml_vk_buffer_read(&extra->buffer_gpu, extra->offset + offset, data, size); + vk_buffer buf = extra->buffer_gpu.lock(); - UNUSED(buffer); + ggml_vk_buffer_read(ctx->ctx, buf, extra->offset + offset, data, size); } GGML_CALL static bool ggml_backend_vk_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) { if (ggml_backend_buffer_is_vk(src->buffer)) { + ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context; ggml_tensor_extra_gpu * src_extra = (ggml_tensor_extra_gpu *) src->extra; ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra; - ggml_vk_buffer_copy(&src_extra->buffer_gpu, src_extra->offset, &dst_extra->buffer_gpu, dst_extra->offset, ggml_nbytes(src)); + vk_buffer src_buf = src_extra->buffer_gpu.lock(); + vk_buffer dst_buf = dst_extra->buffer_gpu.lock(); + + ggml_vk_buffer_copy(dst_buf, dst_extra->offset, src_buf, src_extra->offset, ggml_nbytes(src)); return true; } return false; - - UNUSED(buffer); } GGML_CALL static void ggml_backend_vk_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context; - ggml_vk_buffer_memset(&ctx->dev_buffer, 0, value, buffer->size); + ggml_vk_buffer_memset(ctx->ctx, ctx->dev_buffer, 0, value, buffer->size); } static ggml_backend_buffer_i ggml_backend_vk_buffer_interface = { @@ -4386,6 +4696,7 @@ static ggml_backend_buffer_i ggml_backend_vk_buffer_interface = { // vk buffer type struct ggml_backend_vk_buffer_type_context { std::string name; + ggml_backend_vk_context * ctx; }; GGML_CALL static const char * ggml_backend_vk_buffer_type_name(ggml_backend_buffer_type_t buft) { @@ -4398,25 +4709,22 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_vk_buffer_type_alloc_buffer( #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_buffer_type_alloc_buffer(" << size << ")" << std::endl; #endif - vk_buffer dev_buffer = ggml_vk_create_buffer_device(size); + ggml_backend_vk_buffer_type_context * ctx = (ggml_backend_vk_buffer_type_context *) buft->context; + vk_buffer dev_buffer = ggml_vk_create_buffer_device(ctx->ctx, size); - ggml_backend_vk_buffer_context * ctx = new ggml_backend_vk_buffer_context(dev_buffer); + ggml_backend_vk_buffer_context * bufctx = new ggml_backend_vk_buffer_context(ctx->ctx, std::move(dev_buffer), ctx->name); - return ggml_backend_buffer_init(buft, ggml_backend_vk_buffer_interface, ctx, size); - - UNUSED(buft); + return ggml_backend_buffer_init(buft, ggml_backend_vk_buffer_interface, bufctx, size); } GGML_CALL static size_t ggml_backend_vk_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { - return vk_device.properties.limits.minStorageBufferOffsetAlignment; - - UNUSED(buft); + ggml_backend_vk_buffer_type_context * ctx = (ggml_backend_vk_buffer_type_context *) buft->context; + return ctx->ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment; } GGML_CALL static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { - return vk_device.max_memory_allocation_size; - - UNUSED(buft); + ggml_backend_vk_buffer_type_context * ctx = (ggml_backend_vk_buffer_type_context *) buft->context; + return ctx->ctx->device.lock()->max_memory_allocation_size; } GGML_CALL static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { @@ -4426,9 +4734,14 @@ GGML_CALL static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_ } GGML_CALL static bool ggml_backend_vk_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) { - return ggml_backend_is_vk(backend); + if (!ggml_backend_is_vk(backend)) { + return false; + } - UNUSED(buft); + ggml_backend_vk_buffer_type_context * buft_ctx = (ggml_backend_vk_buffer_type_context *)buft->context; + ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; + + return buft_ctx->ctx->idx == ctx->idx; } static ggml_backend_buffer_type_i ggml_backend_vk_buffer_type_interface = { @@ -4441,20 +4754,16 @@ static ggml_backend_buffer_type_i ggml_backend_vk_buffer_type_interface = { /* .is_host = */ NULL, }; -GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type() { - static ggml_backend_buffer_type ggml_backend_vk_buffer_type; +GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t idx) { +#ifdef GGML_VULKAN_DEBUG + std::cerr << "ggml_backend_vk_buffer_type(" << idx << ")" << std::endl; +#endif - static bool ggml_backend_vk_buffer_type_initialized = false; + GGML_ASSERT(idx < vk_instance.device_indices.size()); - if (!ggml_backend_vk_buffer_type_initialized) { - ggml_backend_vk_buffer_type = { - /* .iface = */ ggml_backend_vk_buffer_type_interface, - /* .context = */ new ggml_backend_vk_buffer_type_context{GGML_VK_NAME}, - }; - ggml_backend_vk_buffer_type_initialized = true; - } + ggml_backend_vk_init(idx); - return &ggml_backend_vk_buffer_type; + return &vk_instance.buffer_types[idx]; } // host buffer type @@ -4472,13 +4781,19 @@ GGML_CALL static const char * ggml_backend_vk_host_buffer_name(ggml_backend_buff } GGML_CALL static void ggml_backend_vk_host_buffer_free_buffer(ggml_backend_buffer_t buffer) { - ggml_vk_host_free(buffer->context); +#ifdef GGML_VULKAN_DEBUG + std::cerr << "ggml_backend_vk_host_buffer_free_buffer()" << std::endl; +#endif + ggml_vk_host_free(&vk_instance.contexts[0], buffer->context); } GGML_CALL static ggml_backend_buffer_t ggml_backend_vk_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { +#ifdef GGML_VULKAN_DEBUG + std::cerr << "ggml_backend_vk_host_buffer_type_alloc_buffer(" << size << ")" << std::endl; +#endif void * ptr = nullptr; try { - ptr = ggml_vk_host_malloc(size); + ptr = ggml_vk_host_malloc(&vk_instance.contexts[0], size); } catch (vk::SystemError& e) { std::cerr << "ggml_vulkan: Failed to allocate pinned memory." << std::endl; std::cerr << "ggml_vulkan: " << e.what() << std::endl; @@ -4495,7 +4810,7 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_vk_host_buffer_type_alloc_bu } GGML_CALL static size_t ggml_backend_vk_host_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { - return vk_device.properties.limits.minMemoryMapAlignment; + return vk_instance.contexts[0].device.lock()->properties.limits.minMemoryMapAlignment; UNUSED(buft); } @@ -4514,127 +4829,150 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() { /* .context = */ nullptr, }; + if (!vk_instance.contexts[0].initialized) { + // Fall back to CPU + return ggml_backend_cpu_buffer_type(); + } + return &ggml_backend_vk_buffer_type_host; } // backend GGML_CALL static const char * ggml_backend_vk_name(ggml_backend_t backend) { - ggml_backend_vk_context * vk_ctx = (ggml_backend_vk_context *)backend->context; + ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; - return vk_ctx->name.c_str(); + return ctx->name.c_str(); } GGML_CALL static void ggml_backend_vk_free(ggml_backend_t backend) { - ggml_backend_vk_context * vk_ctx = (ggml_backend_vk_context *)backend->context; + ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; +#ifdef GGML_VULKAN_DEBUG + std::cerr << "ggml_backend_vk_free(" << ctx->name << ")" << std::endl; +#endif - delete vk_ctx; + size_t idx = ctx->idx; + + ggml_vk_cleanup(ctx); + + // Release device + vk_instance.devices[ctx->idx].reset(); + ctx->initialized = false; + + vk_instance.initialized[idx] = false; + vk_instance.backends[idx] = nullptr; + memset(&vk_instance.buffer_types[idx], 0, sizeof(ggml_backend_buffer_type)); delete backend; } GGML_CALL static ggml_backend_buffer_type_t ggml_backend_vk_get_default_buffer_type(ggml_backend_t backend) { - return ggml_backend_vk_buffer_type(); + ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; - UNUSED(backend); + GGML_ASSERT(ctx->initialized); + + return ggml_backend_vk_buffer_type(ctx->idx); } GGML_CALL static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_set_tensor_async(" << size << ")" << std::endl; #endif - GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type() || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type"); + ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; + GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type(ctx->idx) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type"); GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU); ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra; - if (vk_transfer_ctx == nullptr) { + if (ctx->transfer_ctx == nullptr) { // Initialize new transfer context - vk_transfer_ctx = ggml_vk_create_context(vk_device.transfer_queue); - ggml_vk_ctx_begin(vk_transfer_ctx); + ctx->transfer_ctx = ggml_vk_create_context(ctx, ctx->device.lock()->transfer_queue); + ggml_vk_ctx_begin(ctx, ctx->transfer_ctx); } - ggml_vk_buffer_write_async(vk_transfer_ctx, &extra->buffer_gpu, extra->offset + offset, data, size); + vk_buffer buf = extra->buffer_gpu.lock(); - UNUSED(backend); + ggml_vk_buffer_write_async(ctx, ctx->transfer_ctx, buf, extra->offset + offset, data, size); } GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_get_tensor_async(" << size << ")" << std::endl; #endif - GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type() || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type"); + ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; + GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type(ctx->idx) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type"); GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU); ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra; - if (vk_transfer_ctx == nullptr) { + if (ctx->transfer_ctx == nullptr) { // Initialize new transfer context - vk_transfer_ctx = ggml_vk_create_context(vk_device.transfer_queue); - ggml_vk_ctx_begin(vk_transfer_ctx); + ctx->transfer_ctx = ggml_vk_create_context(ctx, ctx->device.lock()->transfer_queue); + ggml_vk_ctx_begin(ctx, ctx->transfer_ctx); } - ggml_vk_buffer_read_async(vk_transfer_ctx, &extra->buffer_gpu, extra->offset + offset, data, size); + vk_buffer buf = extra->buffer_gpu.lock(); - UNUSED(backend); + ggml_vk_buffer_read_async(ctx, ctx->transfer_ctx, buf, extra->offset + offset, data, size); } GGML_CALL static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend, const ggml_tensor * src, ggml_tensor * dst) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_cpy_tensor_async()" << std::endl; #endif - if ((dst->buffer->buft == ggml_backend_vk_buffer_type() || dst->buffer->buft == ggml_backend_vk_host_buffer_type()) && ggml_backend_buffer_is_vk(src->buffer)) { + ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; + if ((dst->buffer->buft == ggml_backend_vk_buffer_type(ctx->idx) || dst->buffer->buft == ggml_backend_vk_host_buffer_type()) && ggml_backend_buffer_is_vk(src->buffer)) { ggml_tensor_extra_gpu * src_extra = (ggml_tensor_extra_gpu *) src->extra; ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra; - if (vk_transfer_ctx == nullptr) { + if (ctx->transfer_ctx == nullptr) { // Initialize new transfer context - vk_transfer_ctx = ggml_vk_create_context(vk_device.transfer_queue); - ggml_vk_ctx_begin(vk_transfer_ctx); + ctx->transfer_ctx = ggml_vk_create_context(ctx, ctx->device.lock()->transfer_queue); + ggml_vk_ctx_begin(ctx, ctx->transfer_ctx); } - ggml_vk_buffer_copy_async(vk_transfer_ctx, &src_extra->buffer_gpu, src_extra->offset, &dst_extra->buffer_gpu, dst_extra->offset, ggml_nbytes(src)); + vk_buffer src_buf = src_extra->buffer_gpu.lock(); + vk_buffer dst_buf = dst_extra->buffer_gpu.lock(); + + ggml_vk_buffer_copy_async(ctx->transfer_ctx, src_buf, src_extra->offset, dst_buf, dst_extra->offset, ggml_nbytes(src)); return true; } return false; - - UNUSED(backend); } GGML_CALL static void ggml_backend_vk_synchronize(ggml_backend_t backend) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_synchronize()" << std::endl; #endif - if(vk_transfer_ctx == nullptr) { + ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; + if(ctx->transfer_ctx == nullptr) { return; } - ggml_vk_ctx_end(vk_transfer_ctx); + ggml_vk_ctx_end(ctx->transfer_ctx); - for (auto& cpy : vk_transfer_ctx->in_memcpys) { + for (auto& cpy : ctx->transfer_ctx->in_memcpys) { memcpy(cpy.dst, cpy.src, cpy.n); } - ggml_vk_submit(vk_transfer_ctx, vk_fence); - VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "ggml_backend_vk_synchronize waitForFences"); - vk_device.device.resetFences({ vk_fence }); + ggml_vk_submit(ctx->transfer_ctx, ctx->fence); + VK_CHECK(ctx->device.lock()->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_backend_vk_synchronize waitForFences"); + ctx->device.lock()->device.resetFences({ ctx->fence }); - for (auto& cpy : vk_transfer_ctx->out_memcpys) { + for (auto& cpy : ctx->transfer_ctx->out_memcpys) { memcpy(cpy.dst, cpy.src, cpy.n); } - vk_transfer_ctx = nullptr; - - UNUSED(backend); + ctx->transfer_ctx = nullptr; } GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { - // ggml_backend_vk_context * vk_ctx = (ggml_backend_vk_context *)backend->context; + ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_vk_preallocate_buffers_graph(cgraph->nodes[i]); + ggml_vk_preallocate_buffers_graph(ctx, cgraph->nodes[i]); } - ggml_vk_preallocate_buffers(); + ggml_vk_preallocate_buffers(ctx); int last_node = cgraph->n_nodes - 1; @@ -4644,7 +4982,7 @@ GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml } for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_vk_build_graph(cgraph->nodes[i], i == last_node); + ggml_vk_build_graph(ctx,cgraph->nodes[i], i == last_node); } ggml_compute_params params = {}; @@ -4657,19 +4995,19 @@ GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml continue; } - bool ok = ggml_vk_compute_forward(¶ms, node); + bool ok = ggml_vk_compute_forward(ctx, ¶ms, node); if (!ok) { fprintf(stderr, "%s: error: op not supported %s (%s)\n", __func__, node->name, ggml_op_name(node->op)); } #ifdef GGML_VULKAN_CHECK_RESULTS else { - ggml_vk_check_results_1(¶ms, node); + ggml_vk_check_results_1(ctx, ¶ms, node); } #endif GGML_ASSERT(ok); } - ggml_vk_graph_cleanup(); + ggml_vk_graph_cleanup(ctx); return true; @@ -4734,7 +5072,7 @@ GGML_CALL static bool ggml_backend_vk_supports_op(ggml_backend_t backend, const } return false; } break; - // case GGML_OP_DUP: + case GGML_OP_DUP: // case GGML_OP_REPEAT: // { // ggml_type src0_type = op->src[0]->type; @@ -4786,18 +5124,30 @@ static ggml_backend_i ggml_backend_vk_interface = { /* .supports_op = */ ggml_backend_vk_supports_op, }; -GGML_CALL ggml_backend_t ggml_backend_vk_init() { - ggml_vk_init(); // TODO: remove from ggml.c +GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t idx) { + if (vk_instance.initialized[idx]) { + return vk_instance.backends[idx]; + } +#ifdef GGML_VULKAN_DEBUG + std::cerr << "ggml_backend_vk_init(" << idx << ")" << std::endl; +#endif - ggml_backend_vk_context * ctx = new ggml_backend_vk_context { - /* .name = */ GGML_VK_NAME, + ggml_backend_vk_context * ctx = &vk_instance.contexts[idx]; + ggml_vk_init(ctx, idx); + ctx->name = GGML_VK_NAME + std::to_string(idx); + vk_instance.buffer_types[idx] = { + /* .iface = */ ggml_backend_vk_buffer_type_interface, + /* .context = */ new ggml_backend_vk_buffer_type_context{ ctx->name, ctx }, }; + vk_instance.initialized[idx] = true; ggml_backend_t vk_backend = new ggml_backend { /* .interface = */ ggml_backend_vk_interface, - /* .context = */ ctx + /* .context = */ &vk_instance.contexts[ctx->idx], }; + vk_instance.backends[idx] = vk_backend; + return vk_backend; } @@ -4805,20 +5155,47 @@ GGML_CALL bool ggml_backend_is_vk(ggml_backend_t backend) { return backend && backend->iface.get_name == ggml_backend_vk_name; } +GGML_CALL int ggml_backend_vk_get_device_count() { + return ggml_vk_get_device_count(); +} + +GGML_CALL void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size) { + ggml_vk_get_device_description(device, description, description_size); +} + +GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) { + GGML_ASSERT(device < vk_instance.device_indices.size()); + + vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]]; + + vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties(); + + for (const vk::MemoryHeap& heap : memprops.memoryHeaps) { + if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) { + *total = heap.size; + *free = heap.size; + break; + } + } +} + // backend registry GGML_CALL static ggml_backend_t ggml_backend_reg_vk_init(const char * params, void * user_data) { - ggml_backend_t vk_backend = ggml_backend_vk_init(); + ggml_backend_t vk_backend = ggml_backend_vk_init((int) (intptr_t) user_data); return vk_backend; UNUSED(params); - UNUSED(user_data); } extern "C" GGML_CALL int ggml_backend_vk_reg_devices(); GGML_CALL int ggml_backend_vk_reg_devices() { - ggml_backend_register(GGML_VK_NAME, ggml_backend_reg_vk_init, ggml_backend_vk_buffer_type(), nullptr); - return 1; + for (auto idx : vk_instance.device_indices) { + char name[128]; + snprintf(name, sizeof(name), "%s%ld", GGML_VK_NAME, idx); + ggml_backend_register(name, ggml_backend_reg_vk_init, ggml_backend_vk_buffer_type(idx), (void *) (intptr_t) idx); + } + return vk_instance.device_indices.size(); } // checks @@ -4874,7 +5251,7 @@ static void ggml_vk_print_tensor_area(const ggml_tensor * tensor, const void * d } } -static void ggml_vk_print_tensor(const ggml_tensor * tensor, const char * name) { +static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tensor * tensor, const char * name) { void * tensor_data = tensor->data; if (tensor->backend == GGML_BACKEND_GPU) { @@ -4883,7 +5260,7 @@ static void ggml_vk_print_tensor(const ggml_tensor * tensor, const char * name) ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra; - ggml_vk_buffer_read(&extra->buffer_gpu, extra->offset, tensor_data, tensor_size); + ggml_vk_buffer_read(ctx, extra->buffer_gpu, extra->offset, tensor_data, tensor_size); } std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl; @@ -4944,7 +5321,7 @@ void * comp_result; size_t comp_size; size_t comp_nb[GGML_MAX_DIMS]; size_t check_counter = 0; -static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * tensor) { +static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor) { if (params->ith != 0) { return; } @@ -4966,7 +5343,7 @@ static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * /*.no_alloc =*/ false, }; - struct ggml_context * ctx = ggml_init(iparams); + struct ggml_context * ggml_ctx = ggml_init(iparams); struct ggml_tensor * src0_clone = nullptr; struct ggml_tensor * src1_clone = nullptr; @@ -4979,7 +5356,7 @@ static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * void * src1_buffer; if (src0 != nullptr) { - src0_clone = ggml_dup_tensor(ctx, src0); + src0_clone = ggml_dup_tensor(ggml_ctx, src0); src0_size = ggml_nbytes(src0); @@ -4995,7 +5372,7 @@ static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * for (int i3 = 0; i3 < src0->ne[3]; i3++) { for (int i2 = 0; i2 < src0->ne[2]; i2++) { const int idx = i3*src0->ne[2] + i2; - ggml_vk_buffer_read(&extra->buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]); + ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]); } } @@ -5005,10 +5382,10 @@ static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * src0_clone->nb[i] = src0_clone->nb[i - 1]*src0_clone->ne[i - 1]; } } else { - if (offset + src0_size >= extra->buffer_gpu.size) { - src0_size = extra->buffer_gpu.size - offset; + if (offset + src0_size >= extra->buffer_gpu->size) { + src0_size = extra->buffer_gpu->size - offset; } - ggml_vk_buffer_read(&extra->buffer_gpu, offset, src0_clone->data, src0_size); + ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset, src0_clone->data, src0_size); memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS); } } else { @@ -5016,13 +5393,13 @@ static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * } if (vk_output_tensor > 0 && vk_output_tensor == check_counter) { - ggml_vk_print_tensor(src0, "src0"); + ggml_vk_print_tensor(ctx, src0, "src0"); } ggml_vk_check_tensor(std::string(ggml_op_name(tensor->op)) + "->src0", src0_clone); } if (src1 != nullptr) { - src1_clone = ggml_dup_tensor(ctx, src1); + src1_clone = ggml_dup_tensor(ggml_ctx, src1); src1_size = ggml_nbytes(src1); @@ -5038,7 +5415,7 @@ static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * for (int i3 = 0; i3 < src1->ne[3]; i3++) { for (int i2 = 0; i2 < src1->ne[2]; i2++) { const int idx = i3*src1->ne[2] + i2; - ggml_vk_buffer_read(&extra->buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]); + ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]); } } @@ -5048,10 +5425,10 @@ static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * src1_clone->nb[i] = src1_clone->nb[i - 1]*src1_clone->ne[i - 1]; } } else { - if (offset + src1_size >= extra->buffer_gpu.size) { - src1_size = extra->buffer_gpu.size - offset; + if (offset + src1_size >= extra->buffer_gpu->size) { + src1_size = extra->buffer_gpu->size - offset; } - ggml_vk_buffer_read(&extra->buffer_gpu, offset, src1_clone->data, src1_size); + ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset, src1_clone->data, src1_size); memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS); } } else { @@ -5059,7 +5436,7 @@ static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * } if (vk_output_tensor > 0 && vk_output_tensor == check_counter) { - ggml_vk_print_tensor(src1, "src1"); + ggml_vk_print_tensor(ctx, src1, "src1"); std::cerr << "TENSOR CHECK: " << ggml_op_name(src1_clone->op) << " (check " << check_counter << ")" << std::endl; std::cerr << "src1_clone=" << tensor << " src1_clone->backend: " << src1_clone->backend << " src1_clone->type: " << ggml_type_name(src1_clone->type) << " ne0=" << src1_clone->ne[0] << " nb0=" << src1_clone->nb[0] << " ne1=" << src1_clone->ne[1] << " nb1=" << src1_clone->nb[1] << " ne2=" << src1_clone->ne[2] << " nb2=" << src1_clone->nb[2] << " ne3=" << src1_clone->ne[3] << " nb3=" << src1_clone->nb[3] << std::endl; if (src1->src[0] != nullptr) { @@ -5082,51 +5459,51 @@ static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * } if (tensor->op == GGML_OP_MUL_MAT) { - tensor_clone = ggml_mul_mat(ctx, src0_clone, src1_clone); + tensor_clone = ggml_mul_mat(ggml_ctx, src0_clone, src1_clone); } else if (tensor->op == GGML_OP_MUL) { - tensor_clone = ggml_mul(ctx, src0_clone, src1_clone); + tensor_clone = ggml_mul(ggml_ctx, src0_clone, src1_clone); } else if (tensor->op == GGML_OP_SCALE) { - tensor_clone = ggml_scale(ctx, src0_clone, ((float *)tensor->op_params)[0]); + tensor_clone = ggml_scale(ggml_ctx, src0_clone, ((float *)tensor->op_params)[0]); } else if (tensor->op == GGML_OP_SQR) { - tensor_clone = ggml_sqr(ctx, src0_clone); + tensor_clone = ggml_sqr(ggml_ctx, src0_clone); } else if (tensor->op == GGML_OP_CLAMP) { - tensor_clone = ggml_clamp(ctx, src0_clone, ((float *)tensor->op_params)[0], ((float *)tensor->op_params)[1]); + tensor_clone = ggml_clamp(ggml_ctx, src0_clone, ((float *)tensor->op_params)[0], ((float *)tensor->op_params)[1]); } else if (tensor->op == GGML_OP_ADD) { - tensor_clone = ggml_add(ctx, src0_clone, src1_clone); + tensor_clone = ggml_add(ggml_ctx, src0_clone, src1_clone); } else if (tensor->op == GGML_OP_NORM) { - tensor_clone = ggml_norm(ctx, src0_clone, *(float *)tensor->op_params); + tensor_clone = ggml_norm(ggml_ctx, src0_clone, *(float *)tensor->op_params); } else if (tensor->op == GGML_OP_RMS_NORM) { - tensor_clone = ggml_rms_norm(ctx, src0_clone, *(float *)tensor->op_params); + tensor_clone = ggml_rms_norm(ggml_ctx, src0_clone, *(float *)tensor->op_params); } else if (tensor->op == GGML_OP_SOFT_MAX) { if (src1 != nullptr) { - tensor_clone = ggml_soft_max_ext(ctx, src0_clone, src1_clone, *(float *)tensor->op_params); + tensor_clone = ggml_soft_max_ext(ggml_ctx, src0_clone, src1_clone, *(float *)tensor->op_params); } else { - tensor_clone = ggml_soft_max(ctx, src0_clone); + tensor_clone = ggml_soft_max(ggml_ctx, src0_clone); } } else if (tensor->op == GGML_OP_DIAG_MASK_INF) { - tensor_clone = ggml_diag_mask_inf(ctx, src0_clone, *(float *)tensor->op_params); + tensor_clone = ggml_diag_mask_inf(ggml_ctx, src0_clone, *(float *)tensor->op_params); } else if (tensor->op == GGML_OP_ROPE) { const int n_dims = ((int32_t *) tensor->op_params)[1]; const int mode = ((int32_t *) tensor->op_params)[2]; - const int n_ctx = ((int32_t *) tensor->op_params)[3]; - const int n_orig_ctx = ((int32_t *) tensor->op_params)[4]; + const int n_ggml_ctx = ((int32_t *) tensor->op_params)[3]; + const int n_orig_ggml_ctx = ((int32_t *) tensor->op_params)[4]; float freq_base = ((float *) tensor->op_params)[5]; float freq_scale = ((float *) tensor->op_params)[6]; float ext_factor = ((float *) tensor->op_params)[7]; float attn_factor = ((float *) tensor->op_params)[8]; float beta_fast = ((float *) tensor->op_params)[9]; float beta_slow = ((float *) tensor->op_params)[10]; - tensor_clone = ggml_rope_custom(ctx, src0_clone, src1_clone, n_dims, mode, n_ctx, n_orig_ctx, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow); + tensor_clone = ggml_rope_custom(ggml_ctx, src0_clone, src1_clone, n_dims, mode, n_ggml_ctx, n_orig_ggml_ctx, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow); } else if (tensor->op == GGML_OP_UNARY) { switch (ggml_get_unary_op(tensor)) { case GGML_UNARY_OP_SILU: - tensor_clone = ggml_silu(ctx, src0_clone); + tensor_clone = ggml_silu(ggml_ctx, src0_clone); break; case GGML_UNARY_OP_GELU: - tensor_clone = ggml_gelu(ctx, src0_clone); + tensor_clone = ggml_gelu(ggml_ctx, src0_clone); break; case GGML_UNARY_OP_RELU: - tensor_clone = ggml_relu(ctx, src0_clone); + tensor_clone = ggml_relu(ggml_ctx, src0_clone); break; default: std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl; @@ -5134,40 +5511,40 @@ static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * } } else if (tensor->op == GGML_OP_CPY || tensor->op == GGML_OP_DUP) { if (src1 == nullptr) { - tensor_clone = ggml_dup(ctx, src0_clone); + tensor_clone = ggml_dup(ggml_ctx, src0_clone); tensor_clone->type = tensor->type; } else { - tensor_clone = ggml_cpy(ctx, src0_clone, src1_clone); + tensor_clone = ggml_cpy(ggml_ctx, src0_clone, src1_clone); } } else if (tensor->op == GGML_OP_CONT) { - tensor_clone = ggml_cont_4d(ctx, src0_clone, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]); + tensor_clone = ggml_cont_4d(ggml_ctx, src0_clone, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]); } else if (tensor->op == GGML_OP_RESHAPE) { - tensor_clone = ggml_reshape_4d(ctx, src0_clone, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]); + tensor_clone = ggml_reshape_4d(ggml_ctx, src0_clone, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]); } else if (tensor->op == GGML_OP_VIEW) { - tensor_clone = ggml_view_4d(ctx, src0_clone, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], tensor->nb[1], tensor->nb[2], tensor->nb[3], ((int32_t *) tensor->op_params)[0]); + tensor_clone = ggml_view_4d(ggml_ctx, src0_clone, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], tensor->nb[1], tensor->nb[2], tensor->nb[3], ((int32_t *) tensor->op_params)[0]); } else if (tensor->op == GGML_OP_PERMUTE) { int32_t * params = (int32_t *)tensor->op_params; - tensor_clone = ggml_permute(ctx, src0_clone, params[0], params[1], params[2], params[3]); + tensor_clone = ggml_permute(ggml_ctx, src0_clone, params[0], params[1], params[2], params[3]); } else if (tensor->op == GGML_OP_TRANSPOSE) { - tensor_clone = ggml_transpose(ctx, src0_clone); + tensor_clone = ggml_transpose(ggml_ctx, src0_clone); } else { std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl; GGML_ASSERT(false); } // Disable vulkan here to avoid the hooks in ggml.c - vk_disable = true; + ctx->disable = true; - ggml_cgraph * cgraph = ggml_new_graph(ctx); + ggml_cgraph * cgraph = ggml_new_graph(ggml_ctx); ggml_build_forward_expand(cgraph, tensor_clone); - ggml_graph_compute_with_ctx(ctx, cgraph, 8); + ggml_graph_compute_with_ctx(ggml_ctx, cgraph, 8); - vk_disable = false; + ctx->disable = false; ggml_vk_check_tensor(ggml_op_name(tensor->op), tensor_clone); if (vk_output_tensor > 0 && vk_output_tensor == check_counter) { - ggml_vk_print_tensor(tensor_clone, "tensor_clone"); + ggml_vk_print_tensor(ctx, tensor_clone, "tensor_clone"); } comp_size = ggml_nbytes(tensor_clone); @@ -5183,10 +5560,10 @@ static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor * free(src1_buffer); } - ggml_free(ctx); + ggml_free(ggml_ctx); } -void ggml_vk_check_results_1(ggml_compute_params * params, ggml_tensor * tensor) { +static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor) { if (params->ith != 0) { return; } @@ -5208,11 +5585,11 @@ void ggml_vk_check_results_1(ggml_compute_params * params, ggml_tensor * tensor) ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra; - if (extra->offset + tensor_size >= extra->buffer_gpu.size) { - tensor_size = extra->buffer_gpu.size - (extra->offset); + if (extra->offset + tensor_size >= extra->buffer_gpu->size) { + tensor_size = extra->buffer_gpu->size - (extra->offset); } - ggml_vk_buffer_read(&extra->buffer_gpu, extra->offset, tensor_data, tensor_size); + ggml_vk_buffer_read(ctx, extra->buffer_gpu, extra->offset, tensor_data, tensor_size); } float first_error_result = -1.0f; @@ -5339,4 +5716,10 @@ void ggml_vk_check_results_1(ggml_compute_params * params, ggml_tensor * tensor) free(tensor_data); } } + +void ggml_vk_check_results_1_cpu_assist(struct ggml_compute_params * params, struct ggml_tensor * tensor) { + ggml_backend_vk_context * ctx = &vk_instance.contexts[0]; + + ggml_vk_check_results_0(ctx, params, tensor); +} #endif diff --git a/ggml-vulkan.h b/ggml-vulkan.h index eb8a148e2..9645126b4 100644 --- a/ggml-vulkan.h +++ b/ggml-vulkan.h @@ -8,24 +8,29 @@ extern "C" { #endif #define GGML_VK_NAME "Vulkan" +#define GGML_VK_MAX_DEVICES 16 -GGML_API void ggml_vk_init(void); +GGML_API void ggml_vk_init_cpu_assist(void); -GGML_API void ggml_vk_preallocate_buffers_graph(struct ggml_tensor * node); -GGML_API void ggml_vk_preallocate_buffers(void); -GGML_API void ggml_vk_build_graph(struct ggml_tensor * node, bool last_node); -GGML_API bool ggml_vk_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor); +GGML_API void ggml_vk_preallocate_buffers_graph_cpu_assist(struct ggml_tensor * node); +GGML_API void ggml_vk_preallocate_buffers_cpu_assist(void); +GGML_API void ggml_vk_build_graph_cpu_assist(struct ggml_tensor * node, bool last_node); +GGML_API bool ggml_vk_compute_forward_cpu_assist(struct ggml_compute_params * params, struct ggml_tensor * tensor); #ifdef GGML_VULKAN_CHECK_RESULTS -void ggml_vk_check_results_1(struct ggml_compute_params * params, struct ggml_tensor * tensor); +void ggml_vk_check_results_1_cpu_assist(struct ggml_compute_params * params, struct ggml_tensor * tensor); #endif -GGML_API void ggml_vk_graph_cleanup(void); +GGML_API void ggml_vk_graph_cleanup_cpu_assist(void); +GGML_API void ggml_vk_free_cpu_assist(void); // backend API -GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(void); +GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t dev_num); GGML_API GGML_CALL bool ggml_backend_is_vk(ggml_backend_t backend); +GGML_API GGML_CALL int ggml_backend_vk_get_device_count(void); +GGML_API GGML_CALL void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size); +GGML_API GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total); -GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(void); +GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num); // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void); diff --git a/ggml.c b/ggml.c index b9ec0c981..f783a6fd3 100644 --- a/ggml.c +++ b/ggml.c @@ -2343,7 +2343,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { #elif defined(GGML_USE_CLBLAST) ggml_cl_init(); #elif defined(GGML_USE_VULKAN) - ggml_vk_init(); + ggml_vk_init_cpu_assist(); #elif defined(GGML_USE_SYCL) ggml_init_sycl(); #endif @@ -14850,10 +14850,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU); GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU); #elif defined(GGML_USE_VULKAN) - const bool skip_cpu = ggml_vk_compute_forward(params, tensor); + const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor); #ifdef GGML_VULKAN_CHECK_RESULTS if (skip_cpu) { - ggml_vk_check_results_1(params, tensor); + ggml_vk_check_results_1_cpu_assist(params, tensor); } #endif if (skip_cpu) { @@ -17269,12 +17269,12 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { #ifdef GGML_USE_VULKAN for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_vk_preallocate_buffers_graph(cgraph->nodes[i]); + ggml_vk_preallocate_buffers_graph_cpu_assist(cgraph->nodes[i]); } - ggml_vk_preallocate_buffers(); + ggml_vk_preallocate_buffers_cpu_assist(); for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_vk_build_graph(cgraph->nodes[i], i == cgraph->n_nodes - 1); + ggml_vk_build_graph_cpu_assist(cgraph->nodes[i], i == cgraph->n_nodes - 1); } #endif @@ -17330,7 +17330,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { } #ifdef GGML_USE_VULKAN - ggml_vk_graph_cleanup(); + ggml_vk_graph_cleanup_cpu_assist(); #endif // performance stats (graph) diff --git a/llama.cpp b/llama.cpp index f3c5146d1..c45ae1d50 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1355,7 +1355,7 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(int gpu) { #elif defined(GGML_USE_CUBLAS) buft = ggml_backend_cuda_buffer_type(gpu); #elif defined(GGML_USE_VULKAN) - buft = ggml_backend_vk_buffer_type(); + buft = ggml_backend_vk_buffer_type(gpu); #elif defined(GGML_USE_SYCL) buft = ggml_backend_sycl_buffer_type(gpu); #elif defined(GGML_USE_CLBLAST) @@ -1392,6 +1392,33 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_split(int fallback_g GGML_UNUSED(tensor_split); } +static size_t llama_get_device_count() { +#if defined(GGML_USE_CUBLAS) + return ggml_backend_cuda_get_device_count(); +#elif defined(GGML_USE_VULKAN) + return ggml_backend_vk_get_device_count(); +#else + return 1; +#endif +} + +static size_t llama_get_device_memory(int device) { +#if defined(GGML_USE_CUBLAS) + size_t total; + size_t free; + ggml_backend_cuda_get_device_memory(device, &total, &free); + return free; +#elif defined(GGML_USE_VULKAN) + size_t total; + size_t free; + ggml_backend_vk_get_device_memory(device, &total, &free); + return free; +#else + return 1; + GGML_UNUSED(device); +#endif +} + // // globals // @@ -1763,6 +1790,10 @@ struct llama_context { ggml_backend_free(backend); } +#ifdef GGML_USE_VULKAN + ggml_vk_free_cpu_assist(); +#endif + ggml_backend_buffer_free(buf_input); ggml_free(ctx_input); } @@ -3436,22 +3467,18 @@ static bool llm_load_tensors( model.buft_layer[i] = llama_default_buffer_type_cpu(true); } -#ifdef GGML_USE_CUBLAS if (split_mode == LLAMA_SPLIT_LAYER) { // calculate the split points - int device_count = ggml_backend_cuda_get_device_count(); + int device_count = llama_get_device_count(); bool all_zero = tensor_split == nullptr || std::all_of(tensor_split, tensor_split + device_count, [](float x) { return x == 0.0f; }); - float splits[GGML_CUDA_MAX_DEVICES]; + std::vector splits(device_count); if (all_zero) { // default split, by free memory for (int i = 0; i < device_count; ++i) { - size_t total; - size_t free; - ggml_backend_cuda_get_device_memory(i, &total, &free); - splits[i] = free; + splits[i] = llama_get_device_memory(i); } } else { - std::copy(tensor_split, tensor_split + device_count, splits); + std::copy(tensor_split, tensor_split + device_count, splits.begin()); } // sum and normalize the splits to get the split points @@ -3467,19 +3494,17 @@ static bool llm_load_tensors( // assign the repeating layers to the devices according to the splits int act_gpu_layers = std::min(n_gpu_layers, (int)n_layer + 1); for (int64_t i = i_gpu_start; i < n_layer; ++i) { - int layer_gpu = std::upper_bound(splits, splits + device_count, float(i - i_gpu_start)/act_gpu_layers) - splits; + int layer_gpu = std::upper_bound(splits.begin(), splits.begin() + device_count, float(i - i_gpu_start)/act_gpu_layers) - splits.begin(); model.buft_layer[i] = llama_default_buffer_type_offload(layer_gpu); } // assign the output layer if (n_gpu_layers > n_layer) { - int layer_gpu = std::upper_bound(splits, splits + device_count, float(act_gpu_layers - 1)/act_gpu_layers) - splits; + int layer_gpu = std::upper_bound(splits.begin(), splits.begin() + device_count, float(act_gpu_layers - 1)/act_gpu_layers) - splits.begin(); model.buft_output = llama_default_buffer_type_offload(layer_gpu); } else { model.buft_output = llama_default_buffer_type_cpu(true); } - } else -#endif - { + } else { ggml_backend_buffer_type_t split_buft; if (split_mode == LLAMA_SPLIT_ROW) { split_buft = llama_default_buffer_type_split(main_gpu, tensor_split); @@ -10483,6 +10508,8 @@ size_t llama_max_devices(void) { return GGML_CUDA_MAX_DEVICES; #elif defined(GGML_USE_SYCL) return GGML_SYCL_MAX_DEVICES; +#elif defined(GGML_USE_VULKAN) + return GGML_VK_MAX_DEVICES; #else return 1; #endif @@ -10690,13 +10717,15 @@ struct llama_context * llama_new_context_with_model( } #elif defined(GGML_USE_VULKAN) if (model->n_gpu_layers > 0) { - ggml_backend_t backend = ggml_backend_vk_init(); - if (backend == nullptr) { - LLAMA_LOG_ERROR("%s: failed to initialize Vulkan backend\n", __func__); - llama_free(ctx); - return nullptr; + for (int device = 0; device < ggml_backend_vk_get_device_count(); ++device) { + ggml_backend_t backend = ggml_backend_vk_init(device); + if (backend == nullptr) { + LLAMA_LOG_ERROR("%s: failed to initialize Vulkan%d backend\n", __func__, device); + llama_free(ctx); + return nullptr; + } + ctx->backends.push_back(backend); } - ctx->backends.push_back(backend); } #elif defined(GGML_USE_SYCL) if (model->n_gpu_layers > 0) { From 0ef46da632c32faa1a538e5dc180994e8bbb46e1 Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Wed, 7 Feb 2024 02:17:25 -0600 Subject: [PATCH 51/94] llava-cli : always tokenize special tokens (#5382) * llava-cli: tokenize special tokens in prompt * llava-cli: use the escape CLI argument, remove incomplete separate escaping process --- examples/llava/llava-cli.cpp | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp index 6ac70ba69..031e9806d 100644 --- a/examples/llava/llava-cli.cpp +++ b/examples/llava/llava-cli.cpp @@ -34,7 +34,7 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) { static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){ std::string str2 = str; - std::vector embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos); + std::vector embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos, true); eval_tokens(ctx_llama, embd_inp, n_batch, n_past); return true; } @@ -152,20 +152,8 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_ size_t image_pos = prompt.find(""); if (image_pos != std::string::npos) { // new templating mode: Provide the full prompt including system message and use as a placeholder for the image - system_prompt = prompt.substr(0, image_pos); user_prompt = prompt.substr(image_pos + std::string("").length()); - // We replace \n with actual newlines in user_prompt, just in case -e was not used in templating string - size_t pos = 0; - while ((pos = user_prompt.find("\\n", pos)) != std::string::npos) { - user_prompt.replace(pos, 2, "\n"); - pos += 1; // Advance past the replaced newline - } - while ((pos = system_prompt.find("\\n", pos)) != std::string::npos) { - system_prompt.replace(pos, 2, "\n"); - pos += 1; // Advance past the replaced newline - } - printf("system_prompt: %s\n", system_prompt.c_str()); printf("user_prompt: %s\n", user_prompt.c_str()); } else { From 10afa6f1d11ebc9fcc1085f468170002cbf6e2b5 Mon Sep 17 00:00:00 2001 From: Neo Zhang Jianyu Date: Wed, 7 Feb 2024 18:16:55 +0800 Subject: [PATCH 52/94] [SYCL] update install make by w64devkit (#5297) --- README-sycl.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/README-sycl.md b/README-sycl.md index 7aa4274a9..e3a8e726e 100644 --- a/README-sycl.md +++ b/README-sycl.md @@ -311,15 +311,13 @@ Output (example): a. Download & install cmake for Windows: https://cmake.org/download/ -b. Download & install make for Windows provided by mingw-w64 +b. Download & install mingw-w64 make for Windows provided by w64devkit -- Download binary package for Windows in https://github.com/niXman/mingw-builds-binaries/releases. +- Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases). - Like [x86_64-13.2.0-release-win32-seh-msvcrt-rt_v11-rev1.7z](https://github.com/niXman/mingw-builds-binaries/releases/download/13.2.0-rt_v11-rev1/x86_64-13.2.0-release-win32-seh-msvcrt-rt_v11-rev1.7z). +- Extract `w64devkit` on your pc. -- Unzip the binary package. In the **bin** sub-folder and rename **xxx-make.exe** to **make.exe**. - -- Add the **bin** folder path in the Windows system PATH environment. +- Add the **bin** folder path in the Windows system PATH environment, like `C:\xxx\w64devkit\bin\`. ### Build locally: From aa7ab99be29b633263803f2e185265734c2d9427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Wed, 7 Feb 2024 12:40:26 +0100 Subject: [PATCH 53/94] CUDA: fixed mmvq kernel for bs 2,3,4 and -sm row (#5386) --- ggml-cuda.cu | 66 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 3b828375e..db9da2459 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -5313,7 +5313,7 @@ template static __global__ void template static __global__ void mul_mat_vec_q( const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, - const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y_par) { + const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y_par, const int nrows_dst) { const int ncols_y = ncols_y_template != 0 ? ncols_y_template : ncols_y_par; @@ -5352,7 +5352,7 @@ static __global__ void mul_mat_vec_q( tmp[j] = warp_reduce_sum(tmp[j]); if (threadIdx.x == 0) { - dst[j*nrows_x + row] = tmp[j]; + dst[j*nrows_dst + row] = tmp[j]; } } } @@ -6828,7 +6828,7 @@ static void convert_mul_mat_vec_f16_cuda(const void * vx, const dfloat * y, floa template static void mul_mat_vec_q_cuda( const void * vx, const void * vy, float * dst, - const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, cudaStream_t stream) { + const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst, cudaStream_t stream) { GGML_ASSERT(ncols_x % qk == 0); GGML_ASSERT(ncols_y <= 4); @@ -6839,40 +6839,40 @@ static void mul_mat_vec_q_cuda( switch (ncols_y) { case 1: mul_mat_vec_q<1, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); break; case 2: mul_mat_vec_q<2, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); break; case 3: mul_mat_vec_q<3, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); break; case 4: mul_mat_vec_q<4, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); break; // case 5: // mul_mat_vec_q<5, qk, qi, block_q_t, vdr, vec_dot> - // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); // break; // case 6: // mul_mat_vec_q<6, qk, qi, block_q_t, vdr, vec_dot> - // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); // break; // case 7: // mul_mat_vec_q<7, qk, qi, block_q_t, vdr, vec_dot> - // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); // break; // case 8: // mul_mat_vec_q<8, qk, qi, block_q_t, vdr, vec_dot> - // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); // break; default: GGML_ASSERT(false); // mul_mat_vec_q<0, qk, qi, block_q_t, vdr, vec_dot> - // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y); + // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); break; } } @@ -8391,7 +8391,7 @@ static void ggml_cuda_op_mul_mat_q( CUDA_CHECK(cudaGetDevice(&id)); // the main device has a larger memory buffer to hold the results from all GPUs - // nrows_dst == nrows of the matrix that the dequantize_mul_mat kernel writes into + // nrows_dst == nrows of the matrix that the kernel writes into const int64_t nrows_dst = dst->backend == GGML_BACKEND_GPU && id == g_main_device ? ne0 : row_diff; switch (src0->type) { @@ -8525,58 +8525,70 @@ static void ggml_cuda_op_mul_mat_vec_q( const int64_t ne00 = src0->ne[0]; const int64_t row_diff = row_high - row_low; + const int64_t ne10 = src1->ne[0]; + GGML_ASSERT(ne10 % QK8_1 == 0); + + const int64_t ne0 = dst->ne[0]; + + int id; + CUDA_CHECK(cudaGetDevice(&id)); + + // the main device has a larger memory buffer to hold the results from all GPUs + // nrows_dst == nrows of the matrix that the kernel writes into + const int64_t nrows_dst = dst->backend == GGML_BACKEND_GPU && id == g_main_device ? ne0 : row_diff; + switch (src0->type) { case GGML_TYPE_Q4_0: mul_mat_vec_q_cuda - (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; case GGML_TYPE_Q4_1: mul_mat_vec_q_cuda - (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; case GGML_TYPE_Q5_0: mul_mat_vec_q_cuda - (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; case GGML_TYPE_Q5_1: mul_mat_vec_q_cuda - (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; case GGML_TYPE_Q8_0: mul_mat_vec_q_cuda - (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; case GGML_TYPE_Q2_K: mul_mat_vec_q_cuda - (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; case GGML_TYPE_Q3_K: mul_mat_vec_q_cuda - (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; case GGML_TYPE_Q4_K: mul_mat_vec_q_cuda - (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; case GGML_TYPE_Q5_K: mul_mat_vec_q_cuda - (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; case GGML_TYPE_Q6_K: mul_mat_vec_q_cuda - (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; case GGML_TYPE_IQ2_XXS: mul_mat_vec_q_cuda - (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; case GGML_TYPE_IQ2_XS: mul_mat_vec_q_cuda - (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; case GGML_TYPE_IQ3_XXS: mul_mat_vec_q_cuda - (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, stream); + (src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream); break; default: GGML_ASSERT(false); @@ -9909,7 +9921,7 @@ static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1 ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_dequantize_mul_mat_vec, false); } } else { - if (src1->ne[1] <= 4 && min_compute_capability >= MIN_CC_DP4A && ggml_is_quantized(src0->type)) { + if (src1->ne[1] <= 4 && min_compute_capability >= MIN_CC_DP4A && ggml_is_quantized(src0->type) && src1->type == GGML_TYPE_F32) { ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_vec_q, true); } else if (use_mul_mat_q) { ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_q, true); From b906596bb775b17656c2e51d5ab1b347faab6860 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Tom=C5=A1=C3=ADk?= Date: Wed, 7 Feb 2024 19:44:52 +0100 Subject: [PATCH 54/94] Add Ava in the list of llama.cpp UIs (#4362) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 0509b0ba1..7e1187349 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,7 @@ Unless otherwise noted these projects are open-source with permissive licensing: - [ollama/ollama](https://github.com/ollama/ollama) - [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) (AGPL) - [psugihara/FreeChat](https://github.com/psugihara/FreeChat) +- [cztomsik/ava](https://github.com/cztomsik/ava) (MIT) - [ptsochantaris/emeltal](https://github.com/ptsochantaris/emeltal) - [pythops/tenere](https://github.com/pythops/tenere) (AGPL) - [semperai/amica](https://github.com/semperai/amica) From 8c933b70c21e05b685d476d0a1f36b34cbda7365 Mon Sep 17 00:00:00 2001 From: Ebey Abraham Date: Wed, 7 Feb 2024 21:11:30 +0000 Subject: [PATCH 55/94] fix typo in readme (#5399) Co-authored-by: Ebey Abraham --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7e1187349..66166c01b 100644 --- a/README.md +++ b/README.md @@ -680,7 +680,7 @@ python3 -m pip install -r requirements.txt python3 convert.py models/mymodel/ # [Optional] for models using BPE tokenizers -python convert.py models/mymodel/ --vocabtype bpe +python convert.py models/mymodel/ --vocab-type bpe # quantize the model to 4-bits (using Q4_K_M method) ./quantize ./models/mymodel/ggml-model-f16.gguf ./models/mymodel/ggml-model-Q4_K_M.gguf Q4_K_M From c4fbb6717c684196bd13b72d21747557130914e8 Mon Sep 17 00:00:00 2001 From: Michael Podvitskiy Date: Wed, 7 Feb 2024 22:39:23 +0100 Subject: [PATCH 56/94] CMAKE_OSX_ARCHITECTURES for MacOS cross compilation (#5393) Co-authored-by: Jared Van Bortel --- CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 427015be5..a544f2da6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -850,7 +850,9 @@ endif() set(ARCH_FLAGS "") -if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64")) +if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR + (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND + CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$")) message(STATUS "ARM detected") if (MSVC) add_compile_definitions(__ARM_NEON) @@ -876,7 +878,9 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATC list(APPEND ARCH_FLAGS -mno-unaligned-access) endif() endif() -elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" ) +elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR + (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND + CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$")) message(STATUS "x86 detected") if (MSVC) # instruction set detection for MSVC only From 8504d2d0da8cc7a1f2eee0e9e56949f960510b75 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 8 Feb 2024 09:46:47 +0200 Subject: [PATCH 57/94] tests : .gitignore obj files --- tests/.gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/.gitignore b/tests/.gitignore index 092dce742..9427cf13d 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -1,3 +1,3 @@ * !*.* -test-c.o +*.o From 26d4efd11e48908e14e2ee9471a7fc4c57079a1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Thu, 8 Feb 2024 09:46:30 +0100 Subject: [PATCH 58/94] sampling: fix top_k <= 0 (#5388) * sampling: fix top_k <= 0 * Update llama.cpp Co-authored-by: Georgi Gerganov --------- Co-authored-by: Georgi Gerganov --- common/sampling.cpp | 2 +- llama.cpp | 4 ++++ tests/test-sampling.cpp | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index e8675a8c0..844ad7c53 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -132,7 +132,7 @@ static void sampler_queue( const float temp = params.temp; const float dynatemp_range = params.dynatemp_range; const float dynatemp_exponent = params.dynatemp_exponent; - const int32_t top_k = params.top_k <= 0 ? n_vocab : params.top_k; + const int32_t top_k = params.top_k; const float top_p = params.top_p; const float min_p = params.min_p; const float tfs_z = params.tfs_z; diff --git a/llama.cpp b/llama.cpp index c45ae1d50..f8f5796a4 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8585,6 +8585,10 @@ void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * can // } const int64_t t_start_sample_us = ggml_time_us(); + + if (k <= 0) { + k = candidates->size; + } k = std::max(k, (int) min_keep); k = std::min(k, (int) candidates->size); diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp index c3b3d6629..6374958fe 100644 --- a/tests/test-sampling.cpp +++ b/tests/test-sampling.cpp @@ -235,6 +235,8 @@ int main(void) { test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f}, 1); test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f}, 3); + test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 4); + test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 0); test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f}, 0); test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f}, 0.7f); From a6e514a85f0fda38ff78ec91782877ea3d19ed98 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Thu, 8 Feb 2024 09:58:19 +0100 Subject: [PATCH 59/94] llava: fix typo/formatting in README.md (#5405) This commit fixes a typo in the README.md file for the llava example which is causing the formatting to look a little off: Clone llava-v15-7b`` and clip-vit-large-patch14-336`` locally Signed-off-by: Daniel Bevenius --- examples/llava/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llava/README.md b/examples/llava/README.md index 323c5fdd0..295181a34 100644 --- a/examples/llava/README.md +++ b/examples/llava/README.md @@ -21,7 +21,7 @@ After building, run: `./llava-cli` to see the usage. For example: ## Model conversion -- Clone `llava-v15-7b`` and `clip-vit-large-patch14-336`` locally: +- Clone `llava-v15-7b` and `clip-vit-large-patch14-336` locally: ```sh git clone https://huggingface.co/liuhaotian/llava-v1.5-7b From 4aa43fab569215a13495a7f1a0f8afc541b16d03 Mon Sep 17 00:00:00 2001 From: runfuture Date: Thu, 8 Feb 2024 18:36:19 +0800 Subject: [PATCH 60/94] llama : fix MiniCPM (#5392) * fix bug for norm_rms_eps missing * to align with the same order as convert.py for model write * fix: undo HF models permute tensor * update for flake8 lint --- convert-hf-to-gguf.py | 63 +++++++++++++++++++++++++++++++++++++++++-- llama.cpp | 2 ++ 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 829d68368..0d4ea03b4 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -1078,17 +1078,76 @@ class MiniCPMModel(Model): self.gguf_writer.add_name("MiniCPM") self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) - self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) self.gguf_writer.add_block_count(block_count) + self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) + self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"]) self.gguf_writer.add_head_count(self.hparams["num_attention_heads"]) self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"]) self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"]) self.gguf_writer.add_file_type(self.ftype) - self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"]) def set_vocab(self): self._set_vocab_hf() + def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor: + if n_kv_head is not None and n_head != n_kv_head: + n_head //= n_kv_head + + return ( + weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) + .swapaxes(1, 2) + .reshape(weights.shape) + ) + + def write_tensors(self): + block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer"))) + tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count) + n_head = self.hparams.get("num_attention_heads") + n_kv_head = self.hparams.get("num_key_value_heads") + for name, data_torch in self.get_tensors(): + # we don't need these + if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq")): + continue + + old_dtype = data_torch.dtype + + # convert any unsupported data types to float32 + if data_torch.dtype not in (torch.float16, torch.float32): + data_torch = data_torch.to(torch.float32) + + # HF models permute some of the tensors, so we need to undo that + if name.endswith(("q_proj.weight")): + data_torch = self._reverse_hf_permute(data_torch, n_head, n_head) + if name.endswith(("k_proj.weight")): + data_torch = self._reverse_hf_permute(data_torch, n_head, n_kv_head) + + data = data_torch.squeeze().numpy() + + # map tensor names + new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) + if new_name is None: + print(f"Can not map tensor {name!r}") + sys.exit() + + n_dims = len(data.shape) + data_dtype = data.dtype + + # if f32 desired, convert any float16 to float32 + if self.ftype == 0 and data_dtype == np.float16: + data = data.astype(np.float32) + + # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32 + if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1: + data = data.astype(np.float32) + + # if f16 desired, convert any float32 2-dim weight tensors to float16 + if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: + data = data.astype(np.float16) + + print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + + self.gguf_writer.add_tensor(new_name, data) + class QwenModel(Model): @staticmethod diff --git a/llama.cpp b/llama.cpp index f8f5796a4..552e0d02e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2947,6 +2947,8 @@ static void llm_load_hparams( } break; case LLM_ARCH_MINICPM: { + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + switch (hparams.n_layer) { case 40: model.type = e_model::MODEL_2B; break; default: model.type = e_model::MODEL_UNKNOWN; From b7b74cef36a93ae01e0b9af8986d131761742d0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Thu, 8 Feb 2024 11:36:54 +0100 Subject: [PATCH 61/94] fix trailing whitespace (#5407) --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 552e0d02e..89acafbc3 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8587,7 +8587,7 @@ void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * can // } const int64_t t_start_sample_us = ggml_time_us(); - + if (k <= 0) { k = candidates->size; } From ff4ff05c5ff4311c05a8ce1f984c7d8def4f07a5 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Thu, 8 Feb 2024 15:20:03 +0100 Subject: [PATCH 62/94] llava : add missing .py, and fix paths in README.md (#5414) This commit adds the missing .py extension to the convert-image-encoder-to-gguf script. It also fixes the paths for the `model` and `mmproj` options in the example llava-cli command. Signed-off-by: Daniel Bevenius --- examples/llava/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/llava/README.md b/examples/llava/README.md index 295181a34..721d5e613 100644 --- a/examples/llava/README.md +++ b/examples/llava/README.md @@ -14,7 +14,7 @@ Build with cmake or run `make llava-cli` to build it. After building, run: `./llava-cli` to see the usage. For example: ```sh -./llava-cli -m llava-v1.5-7b/ggml-model-q5_k.gguf --mmproj llava-v1.5-7b/mmproj-model-f16.gguf --image path/to/an/image.jpg +./llava-cli -m ../llava-v1.5-7b/ggml-model-f16.gguf --mmproj ../llava-v1.5-7b/mmproj-model-f16.gguf --image path/to/an/image.jpg ``` **note**: A lower temperature like 0.1 is recommended for better quality. add `--temp 0.1` to the command to do so. @@ -38,7 +38,7 @@ python ./examples/llava/llava-surgery.py -m ../llava-v1.5-7b 3. Use `convert-image-encoder-to-gguf.py` to convert the LLaVA image encoder to GGUF: ```sh -python ./examples/llava/convert-image-encoder-to-gguf -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b +python ./examples/llava/convert-image-encoder-to-gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b ``` 4. Use `convert.py` to convert the LLaMA part of LLaVA to GGUF: From 6e99f2a04f1871d637dd77eb4d81de31a5510253 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Thu, 8 Feb 2024 22:39:10 +0530 Subject: [PATCH 63/94] Fix f16_sycl cpy call from Arc (#5411) * fix f16_sycl cpy call * rm old logic * add fp16 build CI * use macro * format fix --- .github/workflows/build.yml | 41 +++++++++++++++++++++++++++++++++++++ ggml-sycl.cpp | 8 +++++--- 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f4c374ce5..ed292d6b8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -184,6 +184,47 @@ jobs: cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx .. cmake --build . --config Release -j $(nproc) + ubuntu-22-cmake-sycl-fp16: + runs-on: ubuntu-22.04 + + continue-on-error: true + + steps: + - uses: actions/checkout@v2 + + - name: add oneAPI to apt + shell: bash + run: | + cd /tmp + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" + + - name: install oneAPI dpcpp compiler + shell: bash + run: | + sudo apt update + sudo apt install intel-oneapi-compiler-dpcpp-cpp + + - name: install oneAPI MKL library + shell: bash + run: | + sudo apt install intel-oneapi-mkl-devel + + - name: Clone + id: checkout + uses: actions/checkout@v3 + + - name: Build + id: cmake_build + run: | + source /opt/intel/oneapi/setvars.sh + mkdir build + cd build + cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON .. + cmake --build . --config Release -j $(nproc) + # TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know # how to debug it. # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124 diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index a03df4c65..dd562a898 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -12148,7 +12148,8 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( const int64_t src1_ncols, const int64_t src1_padded_row_size, const dpct::queue_ptr &stream) { - const int64_t ne00 = src0->ne[0]; + GGML_TENSOR_BINARY_OP_LOCALS + const int64_t row_diff = row_high - row_low; // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics @@ -12167,8 +12168,9 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( } else { src1_dfloat = src1_dfloat_a.alloc(ne00); ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat, - ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1, - sizeof(sycl::half), 0, 0, stream); + ne00, ne00, ne01, ne02, nb00, nb01, nb02, + nb03, ne10, ne11, ne12, nb10, nb11, nb12, + nb13, stream); } } #else From 41f308f58edc2a04bcf9e245100b0a9b10e9a0fb Mon Sep 17 00:00:00 2001 From: slaren Date: Thu, 8 Feb 2024 21:33:03 +0100 Subject: [PATCH 64/94] llama : do not print "offloading layers" message in CPU-only builds (#5416) --- llama.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llama.cpp b/llama.cpp index 89acafbc3..db7d1c1cd 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4209,8 +4209,7 @@ static bool llm_load_tensors( ctx_bufs.emplace_back(ctx, buf); } - // print memory requirements - { + if (llama_supports_gpu_offload()) { const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer)); LLAMA_LOG_INFO("%s: offloading %d repeating layers to GPU\n", __func__, n_gpu); @@ -4222,10 +4221,11 @@ static bool llm_load_tensors( const int max_offloadable_layers = hparams.n_layer + 1; LLAMA_LOG_INFO("%s: offloaded %d/%d layers to GPU\n", __func__, std::min(n_gpu_layers, max_offloadable_layers), max_backend_supported_layers); + } - for (ggml_backend_buffer_t buf : model.bufs) { - LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0); - } + // print memory requirements + for (ggml_backend_buffer_t buf : model.bufs) { + LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0); } // populate tensors_by_name From 8e6a9d2de0096af7120606c74ee2f26684e87b41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Thu, 8 Feb 2024 21:56:40 +0100 Subject: [PATCH 65/94] CUDA: more warps for mmvq on NVIDIA (#5394) --- ggml-cuda.cu | 133 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 86 insertions(+), 47 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index db9da2459..5053757e6 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -5310,22 +5310,26 @@ template static __global__ void #endif // __CUDA_ARCH__ >= CC_VOLTA } -template +#define MMVQ_NWARPS_NVIDIA 4 +#define MMVQ_NWARPS_AMD_RDNA2 1 +#define MMVQ_NWARPS_AMD_OLD 4 + +template +#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) +__launch_bounds__(nwarps*WARP_SIZE, 1) // tells the compiler to use as many registers as it wants +#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) static __global__ void mul_mat_vec_q( const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y_par, const int nrows_dst) { const int ncols_y = ncols_y_template != 0 ? ncols_y_template : ncols_y_par; - const int row = blockIdx.x*blockDim.y + threadIdx.y; - - if (row >= nrows_x) { - return; - } + const int tid = WARP_SIZE*threadIdx.y + threadIdx.x; + const int row = blockIdx.x; const int blocks_per_row_x = ncols_x / qk; const int blocks_per_col_y = nrows_y / QK8_1; - const int blocks_per_warp = vdr * WARP_SIZE / qi; + const int blocks_per_iter = vdr * nwarps*WARP_SIZE / qi; // partial sum for each thread float tmp[ncols_y_template != 0 ? ncols_y_template : 8] = {0.0f}; @@ -5333,12 +5337,12 @@ static __global__ void mul_mat_vec_q( const block_q_t * x = (const block_q_t *) vx; const block_q8_1 * y = (const block_q8_1 *) vy; - for (int i = threadIdx.x / (qi/vdr); i < blocks_per_row_x; i += blocks_per_warp) { + for (int i = tid / (qi/vdr); i < blocks_per_row_x; i += blocks_per_iter) { const int ibx = row*blocks_per_row_x + i; // x block index const int iby = i * (qk/QK8_1); // y block index that aligns with ibx - const int iqs = vdr * (threadIdx.x % (qi/vdr)); // x block quant index when casting the quants to int + const int iqs = vdr * (tid % (qi/vdr)); // x block quant index when casting the quants to int #pragma unroll for (int j = 0; j < ncols_y; ++j) { @@ -5346,9 +5350,25 @@ static __global__ void mul_mat_vec_q( } } + __shared__ float tmp_shared[nwarps-1 > 0 ? nwarps-1 : 1][ncols_y_template != 0 ? ncols_y_template : 8][WARP_SIZE]; + if (threadIdx.y > 0) { +#pragma unroll + for (int j = 0; j < ncols_y; ++j) { + tmp_shared[threadIdx.y-1][j][threadIdx.x] = tmp[j]; + } + } + __syncthreads(); + if (threadIdx.y > 0) { + return; + } + // sum up partial sums and write back result #pragma unroll for (int j = 0; j < ncols_y; ++j) { +#pragma unroll + for (int i = 0; i < nwarps-1; ++i) { + tmp[j] += tmp_shared[i][j][threadIdx.x]; + } tmp[j] = warp_reduce_sum(tmp[j]); if (threadIdx.x == 0) { @@ -6833,46 +6853,65 @@ static void mul_mat_vec_q_cuda( GGML_ASSERT(ncols_x % qk == 0); GGML_ASSERT(ncols_y <= 4); - const int block_num_y = (nrows_x + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; - const dim3 block_nums(block_num_y, 1, 1); - const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); - switch (ncols_y) { - case 1: - mul_mat_vec_q<1, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - break; - case 2: - mul_mat_vec_q<2, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - break; - case 3: - mul_mat_vec_q<3, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - break; - case 4: - mul_mat_vec_q<4, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - break; - // case 5: - // mul_mat_vec_q<5, qk, qi, block_q_t, vdr, vec_dot> - // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - // break; - // case 6: - // mul_mat_vec_q<6, qk, qi, block_q_t, vdr, vec_dot> - // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - // break; - // case 7: - // mul_mat_vec_q<7, qk, qi, block_q_t, vdr, vec_dot> - // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - // break; - // case 8: - // mul_mat_vec_q<8, qk, qi, block_q_t, vdr, vec_dot> - // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - // break; + int id; + CUDA_CHECK(cudaGetDevice(&id)); + + int nwarps; + if (g_device_caps[id].cc >= CC_OFFSET_AMD) { + nwarps = g_device_caps[id].cc >= CC_RDNA2 ? MMVQ_NWARPS_AMD_RDNA2 : MMVQ_NWARPS_AMD_OLD; + } else { + nwarps = MMVQ_NWARPS_NVIDIA; + } + + const dim3 block_nums(nrows_x, 1, 1); + const dim3 block_dims(WARP_SIZE, nwarps, 1); + + switch (nwarps) { + case 1: switch(ncols_y) { + case 1: + mul_mat_vec_q<1, 1, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); + break; + case 2: + mul_mat_vec_q<1, 2, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); + break; + case 3: + mul_mat_vec_q<1, 3, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); + break; + case 4: + mul_mat_vec_q<1, 4, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); + break; + default: + GGML_ASSERT(false); + break; + } break; + case 4: switch(ncols_y) { + case 1: + mul_mat_vec_q<4, 1, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); + break; + case 2: + mul_mat_vec_q<4, 2, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); + break; + case 3: + mul_mat_vec_q<4, 3, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); + break; + case 4: + mul_mat_vec_q<4, 4, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); + break; + default: + GGML_ASSERT(false); + break; + } break; + default: GGML_ASSERT(false); - // mul_mat_vec_q<0, qk, qi, block_q_t, vdr, vec_dot> - // <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); break; } } From 44fbe34360dd760f9e68b4271f21533436397f84 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Fri, 9 Feb 2024 06:52:33 +0100 Subject: [PATCH 66/94] Fix Vulkan crash on APUs with very little device memory (#5424) * Fix Vulkan crash on APUs with very little device memory * Fix debug output function names --- ggml-vulkan.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 9e2846ee4..254f648a6 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -744,6 +744,8 @@ static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t siz } if (memory_type_index >= mem_props.memoryTypeCount) { + ctx->device.lock()->device.destroyBuffer(buf->buffer); + buf->size = 0; throw vk::OutOfDeviceMemoryError("No suitable memory type found"); } @@ -3875,7 +3877,7 @@ static ggml_tensor * ggml_vk_find_last_use(const ggml_tensor * node, ggml_cgraph static void ggml_vk_preallocate_buffers_graph(ggml_backend_vk_context * ctx, ggml_tensor * node){ #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_ctx->preallocate_buffers_graph(" << node << ")" << std::endl; + std::cerr << "ggml_vk_preallocate_buffers_graph(" << node << ")" << std::endl; #endif const bool any_on_device = node->backend == GGML_BACKEND_GPU || (node->src[0] != nullptr && (node->src[0]->backend == GGML_BACKEND_GPU || node->src[0]->backend == GGML_BACKEND_GPU_SPLIT)) @@ -3994,8 +3996,7 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) { return; } #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_ctx->preallocate_buffers()" << std::endl; - std::cerr << "qx_size: " << ctx->prealloc_size_qx << " qy_size: " << ctx->prealloc_size_qy << " x_size: " << ctx->prealloc_size_x << " y_size: " << ctx->prealloc_size_y << " split_k_size: " << ctx->prealloc_size_split_k << std::endl; + std::cerr << "ggml_vk_preallocate_buffers(qx_size: " << ctx->prealloc_size_qx << " qy_size: " << ctx->prealloc_size_qy << " x_size: " << ctx->prealloc_size_x << " y_size: " << ctx->prealloc_size_y << " split_k_size: " << ctx->prealloc_size_split_k << ")" << std::endl; #endif #if defined(GGML_VULKAN_RUN_TESTS) ctx->staging = ggml_vk_create_buffer_check(ctx, 100ul * 1024ul * 1024ul, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); From b2f87cb64db47d799b6f3656855c9caf9792ab2a Mon Sep 17 00:00:00 2001 From: Michael Podvitskiy Date: Fri, 9 Feb 2024 10:56:43 +0100 Subject: [PATCH 67/94] ggml : fix `error C2078: too many initializers` for MSVC ARM64 (#5404) --- ggml-quants.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index 101d3e783..1031e3761 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -268,6 +268,17 @@ static inline float hsum_float_4x4(const __m128 a, const __m128 b, const __m128 #endif // defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) #if defined(__ARM_NEON) + +#ifdef _MSC_VER + +#define ggml_vld1q_u32(w,x,y,z) { ((w) + ((uint64_t)(x) << 32)), ((y) + ((uint64_t)(z) << 32)) } + +#else + +#define ggml_vld1q_u32(w,x,y,z) { (w), (x), (y), (z) } + +#endif + #if !defined(__aarch64__) // 64-bit compatibility @@ -8698,10 +8709,10 @@ void ggml_vec_dot_iq3_xxs_q8_K(const int n, float * restrict s, const void * res for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) { q8b = ggml_vld1q_s8_x4(q8); q8 += 64; memcpy(aux32, gas, 2*sizeof(uint32_t)); gas += 2*sizeof(uint32_t); - const uint32x4_t aux32x4_0 = {iq3xxs_grid[q3[ 0]], iq3xxs_grid[q3[ 1]], iq3xxs_grid[q3[ 2]], iq3xxs_grid[q3[ 3]]}; - const uint32x4_t aux32x4_1 = {iq3xxs_grid[q3[ 4]], iq3xxs_grid[q3[ 5]], iq3xxs_grid[q3[ 6]], iq3xxs_grid[q3[ 7]]}; - const uint32x4_t aux32x4_2 = {iq3xxs_grid[q3[ 8]], iq3xxs_grid[q3[ 9]], iq3xxs_grid[q3[10]], iq3xxs_grid[q3[11]]}; - const uint32x4_t aux32x4_3 = {iq3xxs_grid[q3[12]], iq3xxs_grid[q3[13]], iq3xxs_grid[q3[14]], iq3xxs_grid[q3[15]]}; + const uint32x4_t aux32x4_0 = ggml_vld1q_u32(iq3xxs_grid[q3[ 0]], iq3xxs_grid[q3[ 1]], iq3xxs_grid[q3[ 2]], iq3xxs_grid[q3[ 3]]); + const uint32x4_t aux32x4_1 = ggml_vld1q_u32(iq3xxs_grid[q3[ 4]], iq3xxs_grid[q3[ 5]], iq3xxs_grid[q3[ 6]], iq3xxs_grid[q3[ 7]]); + const uint32x4_t aux32x4_2 = ggml_vld1q_u32(iq3xxs_grid[q3[ 8]], iq3xxs_grid[q3[ 9]], iq3xxs_grid[q3[10]], iq3xxs_grid[q3[11]]); + const uint32x4_t aux32x4_3 = ggml_vld1q_u32(iq3xxs_grid[q3[12]], iq3xxs_grid[q3[13]], iq3xxs_grid[q3[14]], iq3xxs_grid[q3[15]]); q3 += 16; q3s.val[0] = vcombine_s8(vld1_s8((const void *)(signs64 + ((aux32[0] >> 0) & 127))), vld1_s8((const void *)(signs64 + ((aux32[0] >> 7) & 127)))); q3s.val[1] = vcombine_s8(vld1_s8((const void *)(signs64 + ((aux32[0] >> 14) & 127))), vld1_s8((const void *)(signs64 + ((aux32[0] >> 21) & 127)))); From e4124c24775f2cb5b3d7acc93bf9dc5471c172ef Mon Sep 17 00:00:00 2001 From: Marko Tasic Date: Fri, 9 Feb 2024 11:17:00 +0100 Subject: [PATCH 68/94] readme : add JavaScript/Wasm repo (#5415) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 66166c01b..0b4efdd33 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,7 @@ Typically finetunes of the base models below are supported as well. - Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) - Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp) - JS/TS (llama.cpp server client): [lgrammel/modelfusion](https://modelfusion.dev/integration/model-provider/llamacpp) +- JavaScript/Wasm (works in browser): [tangledgroup/llama-cpp-wasm](https://github.com/tangledgroup/llama-cpp-wasm) - Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb) - Rust (nicer API): [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp) - Rust (more direct bindings): [utilityai/llama-cpp-rs](https://github.com/utilityai/llama-cpp-rs) From e5ca3937c685d6e012ac4db40555d6ec100ff03c Mon Sep 17 00:00:00 2001 From: Paul Tsochantaris Date: Fri, 9 Feb 2024 10:48:06 +0000 Subject: [PATCH 69/94] llama : do not cap thread count when MoE on CPU (#5419) * Not capping thread count when MoE inference is running on CPU * Whitespace --- llama.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index db7d1c1cd..0566b087b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -7285,7 +7285,9 @@ static int llama_decode_internal( // TODO: this is mostly important for Apple Silicon where CBLAS is still performing very well // we still need some threads to process all non-mul_mat ops, but not too much to avoid interfering // with the BLAS calls. need a better solution - if (n_tokens >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas()) { + // MoE Special Case: This logic applies when hparams.n_expert == 0, i.e. the model is NOT an MoE model. When an MoE is + // being processed then Accelerate/BLAS will not be involved, so capping would limit performance. + if (n_tokens >= 32 && hparams.n_expert == 0 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas()) { n_threads = std::min(4, n_threads); } From 7c777fcd5dd4af7079e33390cf6a19c328a2666f Mon Sep 17 00:00:00 2001 From: Riley Stewart Date: Fri, 9 Feb 2024 02:49:49 -0800 Subject: [PATCH 70/94] server : fix prompt caching for repeated prompts (#5420) --- examples/server/server.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index eceda30d0..8d668f798 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1592,10 +1592,6 @@ struct llama_server_context LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed); } - LOG_TEE("slot %d : kv cache rm - [%d, end)\n", slot.id, (int) system_tokens.size() + slot.n_past); - - llama_kv_cache_seq_rm(ctx, slot.id, system_tokens.size() + slot.n_past, -1); - slot.cache_tokens = prompt_tokens; if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0) @@ -1609,6 +1605,10 @@ struct llama_server_context } } + LOG_TEE("slot %d : kv cache rm - [%d, end)\n", slot.id, (int) system_tokens.size() + slot.n_past); + + llama_kv_cache_seq_rm(ctx, slot.id, system_tokens.size() + slot.n_past, -1); + LOG_VERBOSE("prompt ingested", { {"n_past", slot.n_past}, {"cached", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)}, From e00d2a62dd1441e3b089570ec06d05c18800d368 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Fri, 9 Feb 2024 14:00:59 +0100 Subject: [PATCH 71/94] llava : add requirements.txt and update README.md (#5428) * llava: add requirements.txt and update README.md This commit adds a `requirements.txt` file to the `examples/llava` directory. This file contains the required Python packages to run the scripts in the `examples/llava` directory. The motivation of this to make it easier for users to run the scripts in `examples/llava`. This will avoid users from having to possibly run into missing package issues if the packages are not installed on their system. Signed-off-by: Daniel Bevenius * llava: fix typo in llava-surgery.py output Signed-off-by: Daniel Bevenius --------- Signed-off-by: Daniel Bevenius --- examples/llava/README.md | 12 +++++++++--- examples/llava/llava-surgery.py | 2 +- examples/llava/requirements.txt | 3 +++ 3 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 examples/llava/requirements.txt diff --git a/examples/llava/README.md b/examples/llava/README.md index 721d5e613..19f1a50a2 100644 --- a/examples/llava/README.md +++ b/examples/llava/README.md @@ -29,19 +29,25 @@ git clone https://huggingface.co/liuhaotian/llava-v1.5-7b git clone https://huggingface.co/openai/clip-vit-large-patch14-336 ``` -2. Use `llava-surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents: +2. Install the required Python packages: + +```sh +pip install -r examples/llava/requirements.txt +``` + +3. Use `llava-surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents: ```sh python ./examples/llava/llava-surgery.py -m ../llava-v1.5-7b ``` -3. Use `convert-image-encoder-to-gguf.py` to convert the LLaVA image encoder to GGUF: +4. Use `convert-image-encoder-to-gguf.py` to convert the LLaVA image encoder to GGUF: ```sh python ./examples/llava/convert-image-encoder-to-gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b ``` -4. Use `convert.py` to convert the LLaMA part of LLaVA to GGUF: +5. Use `convert.py` to convert the LLaMA part of LLaVA to GGUF: ```sh python ./convert.py ../llava-v1.5-7b diff --git a/examples/llava/llava-surgery.py b/examples/llava/llava-surgery.py index 515f6b58d..0a61efdfe 100644 --- a/examples/llava/llava-surgery.py +++ b/examples/llava/llava-surgery.py @@ -42,5 +42,5 @@ if len(clip_tensors) > 0: torch.save(checkpoint, path) print("Done!") -print(f"Now you can convert {args.model} to a a regular LLaMA GGUF file.") +print(f"Now you can convert {args.model} to a regular LLaMA GGUF file.") print(f"Also, use {args.model}/llava.projector to prepare a llava-encoder.gguf file.") diff --git a/examples/llava/requirements.txt b/examples/llava/requirements.txt new file mode 100644 index 000000000..f80f727a7 --- /dev/null +++ b/examples/llava/requirements.txt @@ -0,0 +1,3 @@ +-r ../../requirements/requirements-convert.txt +pillow~=10.2.0 +torch~=2.1.1 From 4b7b38bef5addbd31f453871d79647fbae6bec8a Mon Sep 17 00:00:00 2001 From: Neuman Vong Date: Sat, 10 Feb 2024 05:30:19 +1100 Subject: [PATCH 72/94] vulkan: Set limit for task concurrency (#5427) A common default for the maximum number of open files is 256, which can lead to `asyncio.gather(*tasks)` failing with Too many open files. $ python ggml_vk_generate_shaders.py --glslc=$ANDROID_NDK_PATH/shader-tools/darwin-x86_64/glslc ggml_vulkan: Generating and compiling shaders to SPIR-V Traceback (most recent call last): File "/Users/neuman/Code.noindex/github/llama.cpp/ggml_vk_generate_shaders.py", line 2326, in asyncio.run(main()) File "/Users/neuman/Code.noindex/miniforge3/lib/python3.10/asyncio/runners.py", line 44, in run return loop.run_until_complete(main) File "/Users/neuman/Code.noindex/miniforge3/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete return future.result() File "/Users/neuman/Code.noindex/github/llama.cpp/ggml_vk_generate_shaders.py", line 2294, in main await asyncio.gather(*tasks) [...snip...] OSError: [Errno 24] Too many open files This change sets a reasonable concurrency limit for tasks (and therefore open files), without significant impact on run time. --- ggml_vk_generate_shaders.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ggml_vk_generate_shaders.py b/ggml_vk_generate_shaders.py index 4abb0383f..b2e86e182 100644 --- a/ggml_vk_generate_shaders.py +++ b/ggml_vk_generate_shaders.py @@ -2067,6 +2067,8 @@ type_names = { K_QUANTS_PER_ITERATION = 2 +ASYNCIO_CONCURRENCY = 64 + output_dir = gettempdir() lock = asyncio.Lock() @@ -2291,7 +2293,14 @@ async def main(): tasks.append(string_to_spv("rope_neox_f32", rope_neox_src, {"A_TYPE": "float", "D_TYPE": "float"})) tasks.append(string_to_spv("rope_neox_f16", rope_neox_src, {"A_TYPE": "float16_t", "D_TYPE": "float16_t"})) - await asyncio.gather(*tasks) + # Helper to decorate tasks with semaphore acquisition. + async def withSemaphore(sem, task): + async with sem: + return await task + + # Run tasks concurrently guarded by a concurrency limit. + sem = asyncio.Semaphore(ASYNCIO_CONCURRENCY) + await asyncio.gather(*(withSemaphore(sem, task) for task in tasks)) with open("ggml-vulkan-shaders.hpp", "w") as f: f.write("#include \n\n") From 4633d93af08d890ecd00fa6e4f61d76f21cded4c Mon Sep 17 00:00:00 2001 From: Michael Podvitskiy Date: Fri, 9 Feb 2024 10:42:27 +0100 Subject: [PATCH 73/94] ggml : add abort_callback for cpu backend (ggml/725) * a way to use abort_callback with the cpu backend * whisper update --- ggml-backend.c | 26 ++++++++++++++++++++++---- ggml-backend.h | 5 +++-- ggml.c | 2 +- ggml.h | 9 +++++++-- 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/ggml-backend.c b/ggml-backend.c index 0764dfebc..532da8eda 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -653,6 +653,9 @@ struct ggml_backend_cpu_context { int n_threads; void * work_data; size_t work_size; + + ggml_abort_callback abort_callback; + void * abort_callback_data; }; GGML_CALL static const char * ggml_backend_cpu_name(ggml_backend_t backend) { @@ -691,6 +694,9 @@ GGML_CALL static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(gg cpu_plan->cplan.work_data = malloc(cpu_plan->cplan.work_size); } + cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback; + cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data; + return cpu_plan; } @@ -721,9 +727,11 @@ GGML_CALL static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, str cpu_ctx->work_data = realloc(cpu_ctx->work_data, cplan.work_size); cpu_ctx->work_size = cplan.work_size; } - cplan.work_data = cpu_ctx->work_data; + cplan.abort_callback = cpu_ctx->abort_callback; + cplan.abort_callback_data = cpu_ctx->abort_callback_data; + ggml_graph_compute(cgraph, &cplan); return true; } @@ -759,9 +767,11 @@ static struct ggml_backend_i cpu_backend_i = { ggml_backend_t ggml_backend_cpu_init(void) { struct ggml_backend_cpu_context * ctx = malloc(sizeof(struct ggml_backend_cpu_context)); - ctx->n_threads = GGML_DEFAULT_N_THREADS; - ctx->work_data = NULL; - ctx->work_size = 0; + ctx->n_threads = GGML_DEFAULT_N_THREADS; + ctx->work_data = NULL; + ctx->work_size = 0; + ctx->abort_callback = NULL; + ctx->abort_callback_data = NULL; ggml_backend_t cpu_backend = malloc(sizeof(struct ggml_backend)); @@ -783,6 +793,14 @@ void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) { ctx->n_threads = n_threads; } +void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data) { + GGML_ASSERT(ggml_backend_is_cpu(backend_cpu)); + + struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context; + ctx->abort_callback = abort_callback; + ctx->abort_callback_data = abort_callback_data; +} + GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size) { return ggml_backend_buffer_init(ggml_backend_cpu_buffer_type(), cpu_backend_buffer_i_from_ptr, ptr, size); } diff --git a/ggml-backend.h b/ggml-backend.h index 8b8160fcf..282b3a9b7 100644 --- a/ggml-backend.h +++ b/ggml-backend.h @@ -83,8 +83,9 @@ extern "C" { GGML_API ggml_backend_t ggml_backend_cpu_init(void); - GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend); - GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads); + GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend); + GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads); + GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data); // Create a backend buffer from an existing pointer GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); diff --git a/ggml.c b/ggml.c index f783a6fd3..86cd65862 100644 --- a/ggml.c +++ b/ggml.c @@ -16649,7 +16649,7 @@ struct ggml_compute_state_shared { atomic_int node_n; // active graph node atomic_int node_task; // active graph node task phase - bool (*abort_callback)(void * data); // abort ggml_graph_compute when true + ggml_abort_callback abort_callback; // abort ggml_graph_compute when true void * abort_callback_data; }; diff --git a/ggml.h b/ggml.h index e0a4799f3..1360cd8ee 100644 --- a/ggml.h +++ b/ggml.h @@ -567,6 +567,11 @@ extern "C" { static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor); + // Abort callback + // If not NULL, called before ggml computation + // If it returns true, the computation is aborted + typedef bool (*ggml_abort_callback)(void * data); + // the compute plan that needs to be prepared for ggml_graph_compute() // since https://github.com/ggerganov/ggml/issues/287 struct ggml_cplan { @@ -576,8 +581,8 @@ extern "C" { int n_threads; // abort ggml_graph_compute when true - bool (*abort_callback)(void * data); - void * abort_callback_data; + ggml_abort_callback abort_callback; + void * abort_callback_data; }; enum ggml_cgraph_eval_order { From 43b65f5eb85e8741aba573a8f65bb8efad245d31 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 10 Feb 2024 09:30:36 +0200 Subject: [PATCH 74/94] sync : ggml --- scripts/sync-ggml.last | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last index 7b6c17915..6ae75bc31 100644 --- a/scripts/sync-ggml.last +++ b/scripts/sync-ggml.last @@ -1 +1 @@ -475cbad5c1c834e31e26a2283bc1413181644360 +2c7cf49810d523b9632da393a9e8270b60bf3b24 From cd9aea63b577a83def84dbd6dcd90a6fa02af745 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 10 Feb 2024 09:53:05 +0200 Subject: [PATCH 75/94] scripts : update sync scripts with new backends --- scripts/sync-ggml-am.sh | 12 ++++++++++++ scripts/sync-ggml.sh | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/scripts/sync-ggml-am.sh b/scripts/sync-ggml-am.sh index 6b2514a11..2c391e641 100755 --- a/scripts/sync-ggml-am.sh +++ b/scripts/sync-ggml-am.sh @@ -97,6 +97,8 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then # src/ggml-cuda.cu -> ggml-cuda.cu # src/ggml-cuda.h -> ggml-cuda.h # src/ggml-impl.h -> ggml-impl.h + # src/ggml-kompute.cpp -> ggml-kompute.cpp + # src/ggml-kompute.h -> ggml-kompute.h # src/ggml-metal.h -> ggml-metal.h # src/ggml-metal.m -> ggml-metal.m # src/ggml-mpi.h -> ggml-mpi.h @@ -105,6 +107,10 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then # src/ggml-opencl.h -> ggml-opencl.h # src/ggml-quants.c -> ggml-quants.c # src/ggml-quants.h -> ggml-quants.h + # src/ggml-sycl.cpp -> ggml-sycl.cpp + # src/ggml-sycl.h -> ggml-sycl.h + # src/ggml-vulkan.cpp -> ggml-vulkan.cpp + # src/ggml-vulkan.h -> ggml-vulkan.h # include/ggml/ggml.h -> ggml.h # include/ggml/ggml-alloc.h -> ggml-alloc.h # include/ggml/ggml-backend.h -> ggml-backend.h @@ -123,6 +129,8 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then -e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \ -e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \ -e 's/src\/ggml-impl\.h/ggml-impl.h/g' \ + -e 's/src\/ggml-kompute\.cpp/ggml-kompute.cpp/g' \ + -e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \ -e 's/src\/ggml-metal\.h/ggml-metal.h/g' \ -e 's/src\/ggml-metal\.m/ggml-metal.m/g' \ -e 's/src\/ggml-mpi\.h/ggml-mpi.h/g' \ @@ -131,6 +139,10 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then -e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \ -e 's/src\/ggml-quants\.c/ggml-quants.c/g' \ -e 's/src\/ggml-quants\.h/ggml-quants.h/g' \ + -e 's/src\/ggml-sycl\.cpp/ggml-sycl.cpp/g' \ + -e 's/src\/ggml-sycl\.h/ggml-sycl.h/g' \ + -e 's/src\/ggml-vulkan\.cpp/ggml-vulkan.cpp/g' \ + -e 's/src\/ggml-vulkan\.h/ggml-vulkan.h/g' \ -e 's/include\/ggml\/ggml\.h/ggml.h/g' \ -e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \ -e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \ diff --git a/scripts/sync-ggml.sh b/scripts/sync-ggml.sh index 0097db435..feb34bbc8 100755 --- a/scripts/sync-ggml.sh +++ b/scripts/sync-ggml.sh @@ -7,6 +7,8 @@ cp -rpv ../ggml/src/ggml-backend.c ./ggml-backend.c cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h cp -rpv ../ggml/src/ggml-impl.h ./ggml-impl.h +cp -rpv ../ggml/src/ggml-kompute.cpp ./ggml-kompute.cpp +cp -rpv ../ggml/src/ggml-kompute.h ./ggml-kompute.h cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal @@ -16,6 +18,10 @@ cp -rpv ../ggml/src/ggml-opencl.cpp ./ggml-opencl.cpp cp -rpv ../ggml/src/ggml-opencl.h ./ggml-opencl.h cp -rpv ../ggml/src/ggml-quants.c ./ggml-quants.c cp -rpv ../ggml/src/ggml-quants.h ./ggml-quants.h +cp -rpv ../ggml/src/ggml-sycl.cpp ./ggml-sycl.cpp +cp -rpv ../ggml/src/ggml-sycl.h ./ggml-sycl.h +cp -rpv ../ggml/src/ggml-vulkan.cpp ./ggml-vulkan.cpp +cp -rpv ../ggml/src/ggml-vulkan.h ./ggml-vulkan.h cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h cp -rpv ../ggml/include/ggml/ggml-alloc.h ./ggml-alloc.h cp -rpv ../ggml/include/ggml/ggml-backend.h ./ggml-backend.h From f026f8120f97090d34a52b3dc023c82e0ede3f7d Mon Sep 17 00:00:00 2001 From: Ian Bull Date: Sat, 10 Feb 2024 02:53:28 -0800 Subject: [PATCH 76/94] metal : use autoreleasepool to avoid memory leaks (#5437) There appears to be a known memory leak when using the `MLTCommandBuffer`. It is suggested to use `@autoreleasepool` in [1,2] [1] https://developer.apple.com/forums/thread/662721 [2] https://forums.developer.apple.com/forums/thread/120931 This change-set wraps the `ggml_metal_graph_compute` in a `@autoreleasepool`. This commit addresses https://github.com/ggerganov/llama.cpp/issues/5436 --- ggml-metal.m | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ggml-metal.m b/ggml-metal.m index 5260ed827..c1d8e2de8 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -687,6 +687,7 @@ static bool ggml_metal_graph_compute( struct ggml_metal_context * ctx, struct ggml_cgraph * gf) { + @autoreleasepool { MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor; edesc.dispatchType = MTLDispatchTypeSerial; @@ -2272,6 +2273,7 @@ static bool ggml_metal_graph_compute( [[MTLCaptureManager sharedCaptureManager] stopCapture]; } + } return true; } From 907e08c1109f498b01036367804cff3082c44524 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sun, 11 Feb 2024 11:16:22 +0100 Subject: [PATCH 77/94] server : add llama2 chat template (#5425) * server: add mistral chat template * server: fix typo * server: rename template mistral to llama2 * server: format_llama2: remove BOS * server: validate "--chat-template" argument * server: clean up using_chatml variable Co-authored-by: Jared Van Bortel --------- Co-authored-by: Jared Van Bortel --- examples/server/oai.hpp | 8 ++++++-- examples/server/server.cpp | 22 ++++++++++++++++++++-- examples/server/utils.hpp | 30 ++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 4 deletions(-) diff --git a/examples/server/oai.hpp b/examples/server/oai.hpp index 43410f803..2eca8a9fb 100644 --- a/examples/server/oai.hpp +++ b/examples/server/oai.hpp @@ -15,9 +15,13 @@ using json = nlohmann::json; inline static json oaicompat_completion_params_parse( - const json &body /* openai api json semantics */) + const json &body, /* openai api json semantics */ + const std::string &chat_template) { json llama_params; + std::string formatted_prompt = chat_template == "chatml" + ? format_chatml(body["messages"]) // OpenAI 'messages' to chatml (with <|im_start|>,...) + : format_llama2(body["messages"]); // OpenAI 'messages' to llama2 (with [INST],...) llama_params["__oaicompat"] = true; @@ -30,7 +34,7 @@ inline static json oaicompat_completion_params_parse( // https://platform.openai.com/docs/api-reference/chat/create llama_sampling_params default_sparams; llama_params["model"] = json_value(body, "model", std::string("unknown")); - llama_params["prompt"] = format_chatml(body["messages"]); // OpenAI 'messages' to llama.cpp 'prompt' + llama_params["prompt"] = formatted_prompt; llama_params["cache_prompt"] = json_value(body, "cache_prompt", false); llama_params["temperature"] = json_value(body, "temperature", 0.0); llama_params["top_k"] = json_value(body, "top_k", default_sparams.top_k); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 8d668f798..4d212f1f0 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -36,6 +36,7 @@ struct server_params std::string hostname = "127.0.0.1"; std::vector api_keys; std::string public_path = "examples/server/public"; + std::string chat_template = "chatml"; int32_t port = 8080; int32_t read_timeout = 600; int32_t write_timeout = 600; @@ -1859,6 +1860,8 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms, printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n"); printf(" -gan N, --grp-attn-n N set the group attention factor to extend context size through self-extend(default: 1=disabled), used together with group attention width `--grp-attn-w`"); printf(" -gaw N, --grp-attn-w N set the group attention width to extend context size through self-extend(default: 512), used together with group attention factor `--grp-attn-n`"); + printf(" --chat-template FORMAT_NAME"); + printf(" set chat template, possible valus is: llama2, chatml (default %s)", sparams.chat_template.c_str()); printf("\n"); } @@ -2290,6 +2293,21 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, log_set_target(stdout); LOG_INFO("logging to file is disabled.", {}); } + else if (arg == "--chat-template") + { + if (++i >= argc) + { + invalid_param = true; + break; + } + std::string value(argv[i]); + if (value != "chatml" && value != "llama2") { + fprintf(stderr, "error: chat template can be \"llama2\" or \"chatml\", but got: %s\n", value.c_str()); + invalid_param = true; + break; + } + sparams.chat_template = value; + } else if (arg == "--override-kv") { if (++i >= argc) { @@ -2743,13 +2761,13 @@ int main(int argc, char **argv) // TODO: add mount point without "/v1" prefix -- how? - svr.Post("/v1/chat/completions", [&llama, &validate_api_key](const httplib::Request &req, httplib::Response &res) + svr.Post("/v1/chat/completions", [&llama, &validate_api_key, &sparams](const httplib::Request &req, httplib::Response &res) { res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); if (!validate_api_key(req, res)) { return; } - json data = oaicompat_completion_params_parse(json::parse(req.body)); + json data = oaicompat_completion_params_parse(json::parse(req.body), sparams.chat_template); const int task_id = llama.queue_tasks.get_new_id(); llama.queue_results.add_waiting_task_id(task_id); diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 70cce0721..548548962 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -167,6 +167,34 @@ static T json_value(const json &body, const std::string &key, const T &default_v : default_value; } +inline std::string format_llama2(std::vector messages) +{ + std::ostringstream output; + bool is_inside_turn = false; + + for (auto it = messages.begin(); it != messages.end(); ++it) { + if (!is_inside_turn) { + output << "[INST] "; + } + std::string role = json_value(*it, "role", std::string("user")); + std::string content = json_value(*it, "content", std::string("")); + if (role == "system") { + output << "<>\n" << content << "\n<>\n\n"; + is_inside_turn = true; + } else if (role == "user") { + output << content << " [/INST]"; + is_inside_turn = true; + } else { + output << " " << content << " "; + is_inside_turn = false; + } + } + + LOG_VERBOSE("format_llama2", {{"text", output.str()}}); + + return output.str(); +} + inline std::string format_chatml(std::vector messages) { std::ostringstream chatml_msgs; @@ -180,6 +208,8 @@ inline std::string format_chatml(std::vector messages) chatml_msgs << "<|im_start|>assistant" << '\n'; + LOG_VERBOSE("format_chatml", {{"text", chatml_msgs.str()}}); + return chatml_msgs.str(); } From e4640d8fdf56f14a6db3d092bcd3d2d315cb5d04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Sun, 11 Feb 2024 12:44:51 +0100 Subject: [PATCH 78/94] lookup: add print for drafting performance (#5450) --- examples/lookup/lookup.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/examples/lookup/lookup.cpp b/examples/lookup/lookup.cpp index d8de7dd38..18235b8a1 100644 --- a/examples/lookup/lookup.cpp +++ b/examples/lookup/lookup.cpp @@ -1,7 +1,9 @@ #include "common.h" +#include "ggml.h" #include "llama.h" #include +#include #include #include #include @@ -73,6 +75,8 @@ int main(int argc, char ** argv){ int n_drafted = 0; int n_accept = 0; + int64_t t_draft_us = 0; + int n_past = inp.size(); bool has_eos = false; @@ -160,7 +164,7 @@ int main(int argc, char ** argv){ // generate n_pred tokens through prompt lookup auto prompt_lookup = [&]() -> void { - int inp_size = inp.size(); + const int inp_size = inp.size(); for (int ngram_size = ngram_max ; ngram_size > ngram_min; --ngram_size){ const llama_token * ngram = &inp[inp_size - ngram_size]; @@ -191,8 +195,12 @@ int main(int argc, char ** argv){ return; }; + const int64_t t_start_draft_us = ggml_time_us(); + prompt_lookup(); + t_draft_us += ggml_time_us() - t_start_draft_us; + llama_decode(ctx, batch_tgt); ++n_past; @@ -210,6 +218,8 @@ int main(int argc, char ** argv){ LOG_TEE("n_draft = %d\n", n_draft); LOG_TEE("n_predict = %d\n", n_predict); LOG_TEE("n_drafted = %d\n", n_drafted); + LOG_TEE("t_draft = %.2f ms, %.2f us per token, %.2f tokens per second\n", + t_draft_us*1e-3, 1.0f*t_draft_us/n_drafted, n_drafted/(1e-6*t_draft_us)); LOG_TEE("n_accept = %d\n", n_accept); LOG_TEE("accept = %.3f%%\n", 100.0f * n_accept / n_drafted); From a07d0fee1f05c5c1dc49948ae1a3293db017275f Mon Sep 17 00:00:00 2001 From: snadampal <87143774+snadampal@users.noreply.github.com> Date: Sun, 11 Feb 2024 07:22:33 -0600 Subject: [PATCH 79/94] ggml : add mmla kernels for quantized GEMM (#4966) * ggml: aarch64: implement smmla kernel for q8_0_q8_0 quantized gemm armv8.2-a and above supports MMLA instructions that have higher throughput than DOT. this commit adds mmla kernel for q8_0_q8_0 gemm. The feature is enabled if the platform supports "__ARM_FEATURE_MATMUL_INT8" On AWS Graviton3 processors this kernel resulted up to 1.5x improvement for prompt evaluation throughput compared to the default sdot kernel. * ggml: aarch64: implement smmla kernel for q4_0_q8_0 quantized gemm armv8.2-a and above supports MMLA instructions that have higher throughput than DOT. this commit adds mmla kernel for q4_0_q8_0 gemm. The feature is enabled if the platform supports "__ARM_FEATURE_MATMUL_INT8" On AWS Graviton3 processors this kernel resulted up to 1.5x improvement for prompt evaluation throughput compared to the default sdot kernel. * ggml: aarch64: implement smmla kernel for q4_1_q8_1 quantized gemm armv8.2-a and above supports MMLA instructions that have higher throughput than DOT. this commit adds mmla kernel for q4_1_q8_1 gemm. The feature is enabled if the platform supports "__ARM_FEATURE_MATMUL_INT8" On AWS Graviton3 processors this kernel resulted up to 1.5x improvement for prompt evaluation throughput compared to the default sdot kernel. * ggml: update unit tests for the new vec_dot interface * llama.cpp: add MATMUL_INT8 capability to system_info --- common/common.cpp | 1 + ggml-quants.c | 320 +++++++++++++++++++++++++++++++++-- ggml-quants.h | 26 +-- ggml.c | 164 ++++++++++++------ ggml.h | 5 +- llama.cpp | 1 + pocs/vdot/q8dot.cpp | 4 +- pocs/vdot/vdot.cpp | 4 +- tests/test-quantize-fns.cpp | 2 +- tests/test-quantize-perf.cpp | 2 +- 10 files changed, 441 insertions(+), 88 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index e0082a823..9a489a553 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1550,6 +1550,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l fprintf(stream, "cpu_has_blas: %s\n", ggml_cpu_has_blas() ? "true" : "false"); fprintf(stream, "cpu_has_sse3: %s\n", ggml_cpu_has_sse3() ? "true" : "false"); fprintf(stream, "cpu_has_vsx: %s\n", ggml_cpu_has_vsx() ? "true" : "false"); + fprintf(stream, "cpu_has_matmul_int8: %s\n", ggml_cpu_has_matmul_int8() ? "true" : "false"); #ifdef NDEBUG fprintf(stream, "debug: false\n"); diff --git a/ggml-quants.c b/ggml-quants.c index 1031e3761..6c122dd2a 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -49,6 +49,8 @@ #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define UNUSED GGML_UNUSED + #define MM256_SET_M128I(a, b) _mm256_insertf128_si256(_mm256_castsi128_si256(b), (a), 1) #if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) @@ -3677,15 +3679,88 @@ static inline __m128i get_scale_shuffle(int i) { } #endif -void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { const int qk = QK8_0; const int nb = n / qk; assert(n % qk == 0); +#if defined(__ARM_FEATURE_MATMUL_INT8) + assert((nrc == 2) || (nrc == 1)); +#else + assert(nrc == 1); +#endif const block_q4_0 * restrict x = vx; const block_q8_0 * restrict y = vy; +#if defined(__ARM_FEATURE_MATMUL_INT8) + if (nrc == 2) { + const block_q4_0 * restrict vx0 = vx; + const block_q4_0 * restrict vx1 = vx + bx; + + const block_q8_0 * restrict vy0 = vy; + const block_q8_0 * restrict vy1 = vy + by; + + float32x4_t sumv0 = vdupq_n_f32(0.0f); + + for (int i = 0; i < nb; i++) { + const block_q4_0 * restrict b_x0 = &vx0[i]; + const block_q4_0 * restrict b_x1 = &vx1[i]; + const block_q8_0 * restrict b_y0 = &vy0[i]; + const block_q8_0 * restrict b_y1 = &vy1[i]; + + const uint8x16_t m4b = vdupq_n_u8(0x0F); + const int8x16_t s8b = vdupq_n_s8(0x8); + + const uint8x16_t v0_0 = vld1q_u8(b_x0->qs); + const uint8x16_t v0_1 = vld1q_u8(b_x1->qs); + + // 4-bit -> 8-bit + const int8x16_t v0_0l = vreinterpretq_s8_u8(vandq_u8 (v0_0, m4b)); + const int8x16_t v0_0h = vreinterpretq_s8_u8(vshrq_n_u8(v0_0, 4)); + const int8x16_t v0_1l = vreinterpretq_s8_u8(vandq_u8 (v0_1, m4b)); + const int8x16_t v0_1h = vreinterpretq_s8_u8(vshrq_n_u8(v0_1, 4)); + + // sub 8 + const int8x16_t x0_l = vsubq_s8(v0_0l, s8b); + const int8x16_t x0_h = vsubq_s8(v0_0h, s8b); + const int8x16_t x1_l = vsubq_s8(v0_1l, s8b); + const int8x16_t x1_h = vsubq_s8(v0_1h, s8b); + + // load y + const int8x16_t y0_l = vld1q_s8(b_y0->qs); + const int8x16_t y0_h = vld1q_s8(b_y0->qs + 16); + const int8x16_t y1_l = vld1q_s8(b_y1->qs); + const int8x16_t y1_h = vld1q_s8(b_y1->qs + 16); + + float32x4_t scale = {GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y0->d), + GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y1->d), + GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y0->d), + GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y1->d)}; + + int8x16_t l0 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l))); + int8x16_t l1 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l))); + + int8x16_t l2 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(x0_h), vreinterpretq_s64_s8(x1_h))); + int8x16_t l3 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(x0_h), vreinterpretq_s64_s8(x1_h))); + + int8x16_t r0 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(y0_l), vreinterpretq_s64_s8(y1_l))); + int8x16_t r1 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(y0_l), vreinterpretq_s64_s8(y1_l))); + + int8x16_t r2 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(y0_h), vreinterpretq_s64_s8(y1_h))); + int8x16_t r3 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(y0_h), vreinterpretq_s64_s8(y1_h))); + + sumv0 = vmlaq_f32(sumv0,(vcvtq_f32_s32(vmmlaq_s32((vmmlaq_s32((vmmlaq_s32((vmmlaq_s32(vdupq_n_s32(0), l0, r0)), + l1, r1)), l2, r2)), l3, r3))), scale); + } + float32x4_t sumv1 = vextq_f32(sumv0, sumv0, 2); + float32x4_t sumv2 = vzip1q_f32(sumv0, sumv1); + + vst1_f32(s, vget_low_f32(sumv2)); + vst1_f32(s + bs, vget_high_f32(sumv2)); + return; + } +#endif #if defined(__ARM_NEON) float32x4_t sumv0 = vdupq_n_f32(0.0f); float32x4_t sumv1 = vdupq_n_f32(0.0f); @@ -3967,15 +4042,89 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx, #endif } -void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { const int qk = QK8_1; const int nb = n / qk; assert(n % qk == 0); +#if defined(__ARM_FEATURE_MATMUL_INT8) + assert((nrc == 2) || (nrc == 1)); +#else + assert(nrc == 1); +#endif const block_q4_1 * restrict x = vx; const block_q8_1 * restrict y = vy; +#if defined(__ARM_FEATURE_MATMUL_INT8) + if (nrc == 2) { + const block_q4_1 * restrict vx0 = vx; + const block_q4_1 * restrict vx1 = vx + bx; + const block_q8_1 * restrict vy0 = vy; + const block_q8_1 * restrict vy1 = vy + by; + + float32x4_t sumv0 = vdupq_n_f32(0.0f); + float32x4_t summs0 = vdupq_n_f32(0.0f); + + for (int i = 0; i < nb; i++) { + const block_q4_1 * restrict b_x0 = &vx0[i]; + const block_q4_1 * restrict b_x1 = &vx1[i]; + const block_q8_1 * restrict b_y0 = &vy0[i]; + const block_q8_1 * restrict b_y1 = &vy1[i]; + + float32x4_t summs_t = {GGML_FP16_TO_FP32(b_x0->m) * b_y0->s, + GGML_FP16_TO_FP32(b_x1->m) * b_y0->s, + GGML_FP16_TO_FP32(b_x0->m) * b_y1->s, + GGML_FP16_TO_FP32(b_x1->m) * b_y1->s}; + summs0 += summs_t; + + const uint8x16_t m4b = vdupq_n_u8(0x0F); + + const uint8x16_t v0_0 = vld1q_u8(b_x0->qs); + const uint8x16_t v0_1 = vld1q_u8(b_x1->qs); + + // 4-bit -> 8-bit + const int8x16_t x0_l = vreinterpretq_s8_u8(vandq_u8 (v0_0, m4b)); + const int8x16_t x0_h = vreinterpretq_s8_u8(vshrq_n_u8(v0_0, 4)); + const int8x16_t x1_l = vreinterpretq_s8_u8(vandq_u8 (v0_1, m4b)); + const int8x16_t x1_h = vreinterpretq_s8_u8(vshrq_n_u8(v0_1, 4)); + + // load y + const int8x16_t y0_l = vld1q_s8(b_y0->qs); + const int8x16_t y0_h = vld1q_s8(b_y0->qs + 16); + const int8x16_t y1_l = vld1q_s8(b_y1->qs); + const int8x16_t y1_h = vld1q_s8(b_y1->qs + 16); + + // mmla into int32x4_t + float32x4_t scale = {GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y0->d), + GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y1->d), + GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y0->d), + GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y1->d)}; + + int8x16_t l0 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l))); + int8x16_t l1 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l))); + + int8x16_t l2 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(x0_h), vreinterpretq_s64_s8(x1_h))); + int8x16_t l3 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(x0_h), vreinterpretq_s64_s8(x1_h))); + + int8x16_t r0 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(y0_l), vreinterpretq_s64_s8(y1_l))); + int8x16_t r1 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(y0_l), vreinterpretq_s64_s8(y1_l))); + + int8x16_t r2 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(y0_h), vreinterpretq_s64_s8(y1_h))); + int8x16_t r3 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(y0_h), vreinterpretq_s64_s8(y1_h))); + sumv0 = vmlaq_f32(sumv0,(vcvtq_f32_s32(vmmlaq_s32((vmmlaq_s32((vmmlaq_s32((vmmlaq_s32(vdupq_n_s32(0), l0, r0)), + l1, r1)), l2, r2)), l3, r3))), scale); + } + + float32x4_t sumv1 = vextq_f32(sumv0, sumv0, 2); + float32x4_t sumv2 = vzip1q_f32(sumv0, sumv1); + sumv2 = sumv2 + summs0; + + vst1_f32(s, vget_low_f32(sumv2)); + vst1_f32(s + bs, vget_high_f32(sumv2)); + return; + } +#endif // TODO: add WASM SIMD #if defined(__ARM_NEON) float32x4_t sumv0 = vdupq_n_f32(0.0f); @@ -4107,12 +4256,17 @@ void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void * restri #endif } -void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { const int qk = QK8_0; const int nb = n / qk; assert(n % qk == 0); assert(qk == QK5_0); + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q5_0 * restrict x = vx; const block_q8_0 * restrict y = vy; @@ -4393,12 +4547,17 @@ void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restri #endif } -void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { const int qk = QK8_1; const int nb = n / qk; assert(n % qk == 0); assert(qk == QK5_1); + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q5_1 * restrict x = vx; const block_q8_1 * restrict y = vy; @@ -4692,15 +4851,75 @@ void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restri #endif } -void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { const int qk = QK8_0; const int nb = n / qk; assert(n % qk == 0); +#if defined(__ARM_FEATURE_MATMUL_INT8) + assert((nrc == 2) || (nrc == 1)); +#else + assert(nrc == 1); +#endif const block_q8_0 * restrict x = vx; const block_q8_0 * restrict y = vy; +#if defined(__ARM_FEATURE_MATMUL_INT8) + if (nrc == 2) { + const block_q8_0 * restrict vx0 = vx; + const block_q8_0 * restrict vx1 = vx + bx; + const block_q8_0 * restrict vy0 = vy; + const block_q8_0 * restrict vy1 = vy + by; + + float32x4_t sumv0 = vdupq_n_f32(0.0f); + + for (int i = 0; i < nb; i++) { + const block_q8_0 * restrict b_x0 = &vx0[i]; + const block_q8_0 * restrict b_y0 = &vy0[i]; + + const block_q8_0 * restrict b_x1 = &vx1[i]; + const block_q8_0 * restrict b_y1 = &vy1[i]; + + const int8x16_t x0_l = vld1q_s8(b_x0->qs); + const int8x16_t x0_h = vld1q_s8(b_x0->qs + 16); + const int8x16_t x1_l = vld1q_s8(b_x1->qs); + const int8x16_t x1_h = vld1q_s8(b_x1->qs + 16); + + // load y + const int8x16_t y0_l = vld1q_s8(b_y0->qs); + const int8x16_t y0_h = vld1q_s8(b_y0->qs + 16); + const int8x16_t y1_l = vld1q_s8(b_y1->qs); + const int8x16_t y1_h = vld1q_s8(b_y1->qs + 16); + + float32x4_t scale = {GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y0->d), + GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y1->d), + GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y0->d), + GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y1->d)}; + + int8x16_t l0 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l))); + int8x16_t l1 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l))); + + int8x16_t l2 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(x0_h), vreinterpretq_s64_s8(x1_h))); + int8x16_t l3 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(x0_h), vreinterpretq_s64_s8(x1_h))); + + int8x16_t r0 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(y0_l), vreinterpretq_s64_s8(y1_l))); + int8x16_t r1 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(y0_l), vreinterpretq_s64_s8(y1_l))); + + int8x16_t r2 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(y0_h), vreinterpretq_s64_s8(y1_h))); + int8x16_t r3 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(y0_h), vreinterpretq_s64_s8(y1_h))); + + sumv0 = vmlaq_f32(sumv0,(vcvtq_f32_s32(vmmlaq_s32((vmmlaq_s32((vmmlaq_s32((vmmlaq_s32(vdupq_n_s32(0), l0, r0)), + l1, r1)), l2, r2)), l3, r3))), scale); + } + float32x4_t sumv1 = vextq_f32(sumv0, sumv0, 2); + float32x4_t sumv2 = vzip1q_f32(sumv0, sumv1); + + vst1_f32(s, vget_low_f32(sumv2)); + vst1_f32(s + bs, vget_high_f32(sumv2)); + return; + } +#endif #if defined(__ARM_NEON) float32x4_t sumv0 = vdupq_n_f32(0.0f); float32x4_t sumv1 = vdupq_n_f32(0.0f); @@ -4795,7 +5014,12 @@ void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void * restri } #if QK_K == 256 -void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q2_K * restrict x = vx; const block_q8_K * restrict y = vy; @@ -5171,7 +5395,12 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri #else -void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q2_K * restrict x = vx; const block_q8_K * restrict y = vy; @@ -5429,8 +5658,13 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri #endif #if QK_K == 256 -void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { assert(n % QK_K == 0); + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const uint32_t kmask1 = 0x03030303; const uint32_t kmask2 = 0x0f0f0f0f; @@ -5949,8 +6183,13 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri #else -void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { assert(n % QK_K == 0); + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q3_K * restrict x = vx; const block_q8_K * restrict y = vy; @@ -6292,8 +6531,13 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri #endif #if QK_K == 256 -void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { assert(n % QK_K == 0); + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q4_K * restrict x = vx; const block_q8_K * restrict y = vy; @@ -6648,8 +6892,13 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri #endif } #else -void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { assert(n % QK_K == 0); + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q4_K * restrict x = vx; const block_q8_K * restrict y = vy; @@ -6891,8 +7140,13 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri #endif #if QK_K == 256 -void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { assert(n % QK_K == 0); + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q5_K * restrict x = vx; const block_q8_K * restrict y = vy; @@ -7311,8 +7565,13 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri #else -void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { assert(n % QK_K == 0); + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q5_K * restrict x = vx; const block_q8_K * restrict y = vy; @@ -7577,8 +7836,13 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri #if QK_K == 256 -void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { assert(n % QK_K == 0); + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q6_K * restrict x = vx; const block_q8_K * restrict y = vy; @@ -8009,8 +8273,13 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri #else -void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { assert(n % QK_K == 0); + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q6_K * restrict x = vx; const block_q8_K * restrict y = vy; @@ -8339,8 +8608,13 @@ static const int8_t keven_signs_q2xs[1024] = { 1, 1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, }; -void ggml_vec_dot_iq2_xxs_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { assert(n % QK_K == 0); + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_iq2_xxs * restrict x = vx; const block_q8_K * restrict y = vy; @@ -8462,8 +8736,13 @@ void ggml_vec_dot_iq2_xxs_q8_K(const int n, float * restrict s, const void * res #endif } -void ggml_vec_dot_iq2_xs_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_iq2_xs_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { assert(n % QK_K == 0); + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_iq2_xs * restrict x = vx; const block_q8_K * restrict y = vy; @@ -8682,8 +8961,13 @@ void ggml_vec_dot_iq2_xs_q8_K(const int n, float * restrict s, const void * rest } // TODO -void ggml_vec_dot_iq3_xxs_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +void ggml_vec_dot_iq3_xxs_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { assert(n % QK_K == 0); + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_iq3_xxs * restrict x = vx; const block_q8_K * restrict y = vy; diff --git a/ggml-quants.h b/ggml-quants.h index bfdf3c997..68f09b1e1 100644 --- a/ggml-quants.h +++ b/ggml-quants.h @@ -245,20 +245,20 @@ void dequantize_row_iq2_xs (const block_iq2_xs * GGML_RESTRICT x, float * GGML_ void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); // Dot product -void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); -void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); -void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); -void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); -void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); +void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); -void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); -void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); -void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); -void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); -void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); -void ggml_vec_dot_iq2_xs_q8_K (int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); -void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy); +void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_iq2_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); // // Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization") diff --git a/ggml.c b/ggml.c index 86cd65862..e45b78d7e 100644 --- a/ggml.c +++ b/ggml.c @@ -428,8 +428,8 @@ int64_t ggml_cycles_per_ms(void) { static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float); -static void ggml_vec_dot_f32(const int n, float * restrict s, const float * restrict x, const float * restrict y); -static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t * restrict x, ggml_fp16_t * restrict y); +static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc); +static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc); static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { [GGML_TYPE_I8] = { @@ -457,6 +457,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .is_quantized = false, .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32, .vec_dot_type = GGML_TYPE_F32, + .nrows = 1, }, [GGML_TYPE_F16] = { .type_name = "f16", @@ -468,6 +469,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = (ggml_from_float_t) ggml_fp32_to_fp16_row, .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f16, .vec_dot_type = GGML_TYPE_F16, + .nrows = 1, }, [GGML_TYPE_Q4_0] = { .type_name = "q4_0", @@ -479,6 +481,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = (ggml_from_float_t) quantize_row_q4_0_reference, .vec_dot = ggml_vec_dot_q4_0_q8_0, .vec_dot_type = GGML_TYPE_Q8_0, +#if defined (__ARM_FEATURE_MATMUL_INT8) + .nrows = 2, +#else + .nrows = 1, +#endif }, [GGML_TYPE_Q4_1] = { .type_name = "q4_1", @@ -490,6 +497,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = (ggml_from_float_t) quantize_row_q4_1_reference, .vec_dot = ggml_vec_dot_q4_1_q8_1, .vec_dot_type = GGML_TYPE_Q8_1, +#if defined (__ARM_FEATURE_MATMUL_INT8) + .nrows = 2, +#else + .nrows = 1, +#endif }, [4] = { // GGML_TYPE_Q4_2 .type_name = "DEPRECATED", @@ -501,6 +513,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = NULL, .vec_dot = NULL, .vec_dot_type = GGML_TYPE_COUNT, + .nrows = 1, }, [5] = { // GGML_TYPE_Q4_3 .type_name = "DEPRECATED", @@ -512,6 +525,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = NULL, .vec_dot = NULL, .vec_dot_type = GGML_TYPE_COUNT, + .nrows = 1, }, [GGML_TYPE_Q5_0] = { .type_name = "q5_0", @@ -523,6 +537,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = (ggml_from_float_t) quantize_row_q5_0_reference, .vec_dot = ggml_vec_dot_q5_0_q8_0, .vec_dot_type = GGML_TYPE_Q8_0, + .nrows = 1, }, [GGML_TYPE_Q5_1] = { .type_name = "q5_1", @@ -534,6 +549,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = (ggml_from_float_t) quantize_row_q5_1_reference, .vec_dot = ggml_vec_dot_q5_1_q8_1, .vec_dot_type = GGML_TYPE_Q8_1, + .nrows = 1, }, [GGML_TYPE_Q8_0] = { .type_name = "q8_0", @@ -545,6 +561,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = (ggml_from_float_t) quantize_row_q8_0_reference, .vec_dot = ggml_vec_dot_q8_0_q8_0, .vec_dot_type = GGML_TYPE_Q8_0, +#if defined (__ARM_FEATURE_MATMUL_INT8) + .nrows = 2, +#else + .nrows = 1, +#endif }, [GGML_TYPE_Q8_1] = { .type_name = "q8_1", @@ -554,6 +575,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float = quantize_row_q8_1, .from_float_reference = (ggml_from_float_t) quantize_row_q8_1_reference, .vec_dot_type = GGML_TYPE_Q8_1, + .nrows = 1, }, [GGML_TYPE_Q2_K] = { .type_name = "q2_K", @@ -565,6 +587,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = (ggml_from_float_t) quantize_row_q2_K_reference, .vec_dot = ggml_vec_dot_q2_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, + .nrows = 1, }, [GGML_TYPE_Q3_K] = { .type_name = "q3_K", @@ -576,6 +599,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = (ggml_from_float_t) quantize_row_q3_K_reference, .vec_dot = ggml_vec_dot_q3_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, + .nrows = 1, }, [GGML_TYPE_Q4_K] = { .type_name = "q4_K", @@ -587,6 +611,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = (ggml_from_float_t) quantize_row_q4_K_reference, .vec_dot = ggml_vec_dot_q4_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, + .nrows = 1, }, [GGML_TYPE_Q5_K] = { .type_name = "q5_K", @@ -598,6 +623,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = (ggml_from_float_t) quantize_row_q5_K_reference, .vec_dot = ggml_vec_dot_q5_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, + .nrows = 1, }, [GGML_TYPE_Q6_K] = { .type_name = "q6_K", @@ -609,6 +635,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = (ggml_from_float_t) quantize_row_q6_K_reference, .vec_dot = ggml_vec_dot_q6_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, + .nrows = 1, }, [GGML_TYPE_IQ2_XXS] = { .type_name = "iq2_xxs", @@ -620,6 +647,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = NULL, .vec_dot = ggml_vec_dot_iq2_xxs_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, + .nrows = 1, }, [GGML_TYPE_IQ2_XS] = { .type_name = "iq2_xs", @@ -631,6 +659,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = NULL, .vec_dot = ggml_vec_dot_iq2_xs_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, + .nrows = 1, }, [GGML_TYPE_IQ3_XXS] = { .type_name = "iq3_xxs", @@ -642,6 +671,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_reference = (ggml_from_float_t)quantize_row_iq3_xxs_reference, .vec_dot = ggml_vec_dot_iq3_xxs_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, + .nrows = 1, }, [GGML_TYPE_Q8_K] = { .type_name = "q8_K", @@ -1212,7 +1242,13 @@ inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x) inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; } inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; } -static void ggml_vec_dot_f32(const int n, float * restrict s, const float * restrict x, const float * restrict y) { +static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc) { + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); + #ifdef GGML_SIMD float sumf = 0.0f; const int np = (n & ~(GGML_F32_STEP - 1)); @@ -1249,7 +1285,13 @@ static void ggml_vec_dot_f32(const int n, float * restrict s, const float * rest *s = sumf; } -static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t * restrict x, ggml_fp16_t * restrict y) { +static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc) { + assert(nrc == 1); + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); + ggml_float sumf = 0.0; #if defined(GGML_SIMD) @@ -1455,7 +1497,7 @@ inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { #endif } -inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, x, x); *s = sqrtf(*s); } +inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, 0, x, 0, x, 0, 1); *s = sqrtf(*s); } inline static void ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]*x[i]; } inline static void ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sqrtf(x[i]); } inline static void ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = logf(x[i]); } @@ -9992,6 +10034,7 @@ static void ggml_compute_forward_mul_mat( ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot; enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type; ggml_from_float_t const from_float_to_vec_dot = type_traits[vec_dot_type].from_float; + int64_t const vec_dot_num_rows = type_traits[type].nrows; GGML_ASSERT(ne0 == ne01); GGML_ASSERT(ne1 == ne11); @@ -10159,12 +10202,23 @@ static void ggml_compute_forward_mul_mat( const int64_t blck_0 = 16; const int64_t blck_1 = 16; + // dot kernels can handle 1 row and col at a time, but mmla kernels can process 2 rows and cols + int64_t nrc = vec_dot_num_rows; + // TODO: currently the mmla kernels support only even numbered rows/cols. + // this check can be removed once they are extended to support odd numbered rows/cols too + if ((nr0 % 2 != 0) || (ne11 % 2 != 0)) { + nrc = 1; + } + + const size_t src1_col_stride = src1_cont || src1->type != vec_dot_type ? row_size : nb11; + // attempt to reduce false-sharing (does not seem to make a difference) - float tmp[16]; + // 16 * 2, accounting for mmla kernels + float tmp[32]; for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) { for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) { - for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ++ir1) { + for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ir1 += nrc) { const int64_t i13 = (ir1/(ne12*ne1)); const int64_t i12 = (ir1 - i13*ne12*ne1)/ne1; const int64_t i11 = (ir1 - i13*ne12*ne1 - i12*ne1); @@ -10187,17 +10241,19 @@ static void ggml_compute_forward_mul_mat( (src1_cont || src1->type != vec_dot_type ? (i11 + i12*ne11 + i13*ne12*ne11)*row_size : (i11*nb11 + i12*nb12 + i13*nb13)); - float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3)); //for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) { // vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col); //} - for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) { - vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col); + for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ir0 += nrc) { + vec_dot(ne00, &tmp[ir0 - iir0], (nrc>1 ? 16 : 0), src0_row + ir0*nb01, (nrc>1 ? nb01 : 0), src1_col, (nrc>1 ? src1_col_stride : 0), nrc); + } + + for (int cn = 0; cn < nrc; ++cn) { + memcpy(&dst_col[iir0 + cn*nb1/nb0], tmp + (cn*16), (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float)); } - memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float)); } } } @@ -10386,7 +10442,7 @@ static void ggml_compute_forward_mul_mat_id( //} for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) { - vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col); + vec_dot(ne00, &tmp[ir0 - iir0], 0, src0_row + ir0*nb01, 0, src1_col, 0, 1); } memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float)); } @@ -11568,7 +11624,7 @@ static void ggml_compute_forward_soft_max_back_f32( // linear runtime, no additional memory float dot_y_dy = 0; - ggml_vec_dot_f32 (nc, &dot_y_dy, y, dy); + ggml_vec_dot_f32 (nc, &dot_y_dy, 0, y, 0, dy, 0, 1); ggml_vec_cpy_f32 (nc, dx, dy); ggml_vec_acc1_f32(nc, dx, -dot_y_dy); ggml_vec_mul_f32 (nc, dx, dx, y); @@ -12369,9 +12425,9 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32( const int i1n = i10*ne11; for (int i00 = 0; i00 < ne00; i00++) { float v = 0; - ggml_vec_dot_f16(ne02, &v, - (ggml_fp16_t *) wdata_src + i1n, - (ggml_fp16_t *) wdata_kernel + i00*ne02); + ggml_vec_dot_f16(ne02, &v, 0, + (ggml_fp16_t *) wdata_src + i1n, 0, + (ggml_fp16_t *) wdata_kernel + i00*ne02, 0, 1); dst_data[i10*s0 + i00] += v; } } @@ -12466,9 +12522,9 @@ static void ggml_compute_forward_conv_transpose_1d_f32( const int i1n = i10*ne11; for (int i00 = 0; i00 < ne00; i00++) { float v = 0; - ggml_vec_dot_f32(ne02, &v, - wdata_src + i1n, - wdata_kernel + i00*ne02); + ggml_vec_dot_f32(ne02, &v, 0, + wdata_src + i1n, 0, + wdata_kernel + i00*ne02, 0, 1); dst_data[i10*s0 + i00] += v; } } @@ -12783,9 +12839,9 @@ static void ggml_compute_forward_conv_transpose_2d( for (int i01 = 0; i01 < ne01; i01++) { for (int i00 = 0; i00 < ne00; i00++) { float v = 0; - ggml_vec_dot_f16(ne03, &v, - wdata_src + i1n, - wdata_kernel + i01*ne00*ne03 + i00*ne03); + ggml_vec_dot_f16(ne03, &v, 0, + wdata_src + i1n, 0, + wdata_kernel + i01*ne00*ne03 + i00*ne03, 0, 1); dst_data[(i11*stride + i01)*ne0 + i10*stride + i00] += v; } } @@ -13214,9 +13270,9 @@ static void ggml_compute_forward_flash_attn_f32( const int i1 = ik1; ggml_vec_dot_f32(neq0, - S + i1, - (float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), - (float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3))); + S + i1, 0, + (float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), 0, + (float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)), 0, 1); } // scale @@ -13299,9 +13355,9 @@ static void ggml_compute_forward_flash_attn_f32( const int iv3 = iq3; ggml_vec_dot_f32(masked_begin, - (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), - (float *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)), - S); + (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), 0, + (float *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)), 0, + S, 0, 1); } } } @@ -13404,9 +13460,9 @@ static void ggml_compute_forward_flash_attn_f16( const int i1 = ik1; ggml_vec_dot_f16(neq0, - S + i1, - (ggml_fp16_t *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), - (ggml_fp16_t *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3))); + S + i1, 0, + (ggml_fp16_t *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), 0, + (ggml_fp16_t *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)), 0, 1); } } else { for (int64_t ic = 0; ic < nek1; ic += GGML_VEC_DOT_UNROLL) { @@ -13508,9 +13564,9 @@ static void ggml_compute_forward_flash_attn_f16( const int iv3 = iq3; ggml_vec_dot_f16(nev0, - (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), - (ggml_fp16_t *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)), - S16); + (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), 0, + (ggml_fp16_t *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)), 0, + S16, 0, 1); } } else { for (int64_t ic = 0; ic < nev1; ic += GGML_VEC_DOT_UNROLL) { @@ -13652,9 +13708,9 @@ static void ggml_compute_forward_flash_ff_f16( const int i1 = ib01; ggml_vec_dot_f16(nea0, - S + i1, - (ggml_fp16_t *) ((char *) b0->data + (ib01*nbb01 + ib02*nbb02 + ib03*nbb03)), - (ggml_fp16_t *) ((char *) a->data + ( ia1*nba1 + ia2*nba2 + ia3*nba3))); + S + i1, 0, + (ggml_fp16_t *) ((char *) b0->data + (ib01*nbb01 + ib02*nbb02 + ib03*nbb03)), 0, + (ggml_fp16_t *) ((char *) a->data + ( ia1*nba1 + ia2*nba2 + ia3*nba3)), 0, 1); } ggml_vec_add_f32(neb01, S, S, (float *) b1->data); @@ -13677,9 +13733,9 @@ static void ggml_compute_forward_flash_ff_f16( for (int64_t ic = 0; ic < nec01; ++ic) { ggml_vec_dot_f16(neb01, - (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), - (ggml_fp16_t *) ((char *) c0->data + ( ic*nbc01 + i2*nbc02 + i3*nbc03)), - S16); + (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), 0, + (ggml_fp16_t *) ((char *) c0->data + ( ic*nbc01 + i2*nbc02 + i3*nbc03)), 0, + S16, 0, 1); } ggml_vec_add_f32(nec01, @@ -13866,9 +13922,9 @@ static void ggml_compute_forward_flash_attn_back_f32( const int i1 = ik1; ggml_vec_dot_f32(neq0, - S + i1, - (float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), - (float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3))); + S + i1, 0, + (float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), 0, + (float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)), 0, 1); } // scale @@ -14013,7 +14069,7 @@ static void ggml_compute_forward_flash_attn_back_f32( // S = SM * (S - dot(SM, S)) float dot_SM_gradSM = 0; - ggml_vec_dot_f32 (masked_begin, &dot_SM_gradSM, SM, S); + ggml_vec_dot_f32 (masked_begin, &dot_SM_gradSM, 0, SM, 0, S, 0, 1); ggml_vec_acc1_f32(M, S, -dot_SM_gradSM); ggml_vec_mul_f32 (masked_begin, S, S, SM); @@ -18382,7 +18438,7 @@ static enum ggml_opt_result linesearch_backtracking( } // compute the initial gradient in the search direction - ggml_vec_dot_f32(nx, &dginit, g, d); + ggml_vec_dot_f32(nx, &dginit, 0, g, 0, d, 0, 1); // make sure that d points to a descent direction if (0 < dginit) { @@ -18432,7 +18488,7 @@ static enum ggml_opt_result linesearch_backtracking( return count; } - ggml_vec_dot_f32(nx, &dg, g, d); + ggml_vec_dot_f32(nx, &dg, 0, g, 0, d, 0, 1); // check the Wolfe condition if (dg < params->lbfgs.wolfe * dginit) { @@ -18693,8 +18749,8 @@ static enum ggml_opt_result ggml_opt_lbfgs( // ys = y^t \cdot s -> 1 / \rho. // yy = y^t \cdot y. // - ggml_vec_dot_f32(nx, &ys, &lm_y[end[0]*nx], &lm_s[end[0]*nx]); - ggml_vec_dot_f32(nx, &yy, &lm_y[end[0]*nx], &lm_y[end[0]*nx]); + ggml_vec_dot_f32(nx, &ys, 0, &lm_y[end[0]*nx], 0, &lm_s[end[0]*nx], 0, 1); + ggml_vec_dot_f32(nx, &yy, 0, &lm_y[end[0]*nx], 0, &lm_y[end[0]*nx], 0, 1); lm_ys[end[0]] = ys; @@ -18713,7 +18769,7 @@ static enum ggml_opt_result ggml_opt_lbfgs( for (int i = 0; i < bound; ++i) { j[0] = (j[0] + m - 1) % m; // \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1} - ggml_vec_dot_f32(nx, &lm_alpha[j[0]], &lm_s[j[0]*nx], d); + ggml_vec_dot_f32(nx, &lm_alpha[j[0]], 0, &lm_s[j[0]*nx], 0, d, 0, 1); lm_alpha[j[0]] /= lm_ys[j[0]]; // q_{i} = q_{i+1} - \alpha_{i} y_{i} ggml_vec_mad_f32(nx, d, &lm_y[j[0]*nx], -lm_alpha[j[0]]); @@ -18723,7 +18779,7 @@ static enum ggml_opt_result ggml_opt_lbfgs( for (int i = 0; i < bound; ++i) { // \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i} - ggml_vec_dot_f32(nx, &beta, &lm_y[j[0]*nx], d); + ggml_vec_dot_f32(nx, &beta, 0, &lm_y[j[0]*nx], 0, d, 0, 1); beta /= lm_ys[j[0]]; // \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j} ggml_vec_mad_f32(nx, d, &lm_s[j[0]*nx], lm_alpha[j[0]] - beta); @@ -20611,4 +20667,12 @@ int ggml_cpu_has_vsx(void) { #endif } +int ggml_cpu_has_matmul_int8(void) { +#if defined(__ARM_FEATURE_MATMUL_INT8) + return 1; +#else + return 0; +#endif +} + //////////////////////////////////////////////////////////////////////////////// diff --git a/ggml.h b/ggml.h index 1360cd8ee..9cfec5bac 100644 --- a/ggml.h +++ b/ggml.h @@ -2278,6 +2278,7 @@ extern "C" { GGML_API int ggml_cpu_has_ssse3 (void); GGML_API int ggml_cpu_has_sycl (void); GGML_API int ggml_cpu_has_vsx (void); + GGML_API int ggml_cpu_has_matmul_int8(void); // // Internal types and functions exposed for tests and benchmarks @@ -2291,7 +2292,8 @@ extern "C" { #endif typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); - typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y); + typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx, + const void * GGML_RESTRICT y, size_t by, int nrc); typedef struct { const char * type_name; @@ -2303,6 +2305,7 @@ extern "C" { ggml_from_float_t from_float_reference; ggml_vec_dot_t vec_dot; enum ggml_type vec_dot_type; + int64_t nrows; // number of rows to process simultaneously; } ggml_type_traits_t; GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type); diff --git a/llama.cpp b/llama.cpp index 0566b087b..3f39a67fb 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11869,6 +11869,7 @@ const char * llama_print_system_info(void) { s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | "; s += "SSSE3 = " + std::to_string(ggml_cpu_has_ssse3()) + " | "; s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | "; + s += "MATMUL_INT8 = " + std::to_string(ggml_cpu_has_matmul_int8()) + " | "; return s.c_str(); } diff --git a/pocs/vdot/q8dot.cpp b/pocs/vdot/q8dot.cpp index 111770d55..1a52ff5e9 100644 --- a/pocs/vdot/q8dot.cpp +++ b/pocs/vdot/q8dot.cpp @@ -156,8 +156,8 @@ int main(int argc, char** argv) { t1 = std::chrono::high_resolution_clock::now(); float fs; - if (type == 0) funcs.vec_dot(kVecSize * QK4_1, &fs, x40.data(), y.data()); - else funcs.vec_dot(kVecSize * QK4_1, &fs, x41.data(), y.data()); + if (type == 0) funcs.vec_dot(kVecSize * QK4_1, &fs, 0, x40.data(), 0, y.data(), 0, 1); + else funcs.vec_dot(kVecSize * QK4_1, &fs, 0, x41.data(), 0, y.data(), 0, 1); t2 = std::chrono::high_resolution_clock::now(); t = 1e-3*std::chrono::duration_cast(t2-t1).count(); if (iloop > 3) ggml.addResult(fs, t); diff --git a/pocs/vdot/vdot.cpp b/pocs/vdot/vdot.cpp index 73ffcd1ca..17e9e4482 100644 --- a/pocs/vdot/vdot.cpp +++ b/pocs/vdot/vdot.cpp @@ -284,8 +284,8 @@ int main(int argc, char** argv) { else { auto vdot = ggml_internal_get_type_traits(funcs.vec_dot_type); vdot.from_float(y1.data(), q8.data(), kVecSize); - if (useQ4_1) funcs.vec_dot(kVecSize, &result, q41.data(), q8.data()); - else funcs.vec_dot(kVecSize, &result, q40.data(), q8.data()); + if (useQ4_1) funcs.vec_dot(kVecSize, &result, 0, q41.data(), 0, q8.data(), 0, 1); + else funcs.vec_dot(kVecSize, &result, 0, q40.data(), 0, q8.data(), 0, 1); } sumq += result; t2 = std::chrono::high_resolution_clock::now(); diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 43df8022d..5e92d5742 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -87,7 +87,7 @@ static float dot_product_error( vdot.from_float(test_data2, tmp_q2.data(), test_size); float result = INFINITY; - qfns.vec_dot(test_size, &result, tmp_q1.data(), tmp_q2.data()); + qfns.vec_dot(test_size, &result, 0, tmp_q1.data(), 0, tmp_q2.data(), 0, 1); const float dot_ref = dot_product(test_data1, test_data2, test_size); diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index 8ec817344..48d9fae3d 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -346,7 +346,7 @@ int main(int argc, char * argv[]) { printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024)); auto quantize_fn = [&](void) -> float { float result; - qfns.vec_dot(size, &result, test_q1, test_q2); + qfns.vec_dot(size, &result, 0, test_q1, 0, test_q2, 0, 1); return result; }; size_t quantized_size = ggml_row_size(type, size); From 0f2411f154db46780d3aaa3a0664691b2170c83f Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 11 Feb 2024 15:33:01 +0200 Subject: [PATCH 80/94] ggml : fix compile warnings (unused vars) (#4966) --- ggml-quants.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ggml-quants.c b/ggml-quants.c index 6c122dd2a..b2a309bf8 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -3689,6 +3689,10 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r #else assert(nrc == 1); #endif + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q4_0 * restrict x = vx; const block_q8_0 * restrict y = vy; @@ -4052,6 +4056,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r #else assert(nrc == 1); #endif + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q4_1 * restrict x = vx; const block_q8_1 * restrict y = vy; @@ -4861,6 +4869,10 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r #else assert(nrc == 1); #endif + UNUSED(nrc); + UNUSED(bx); + UNUSED(by); + UNUSED(bs); const block_q8_0 * restrict x = vx; const block_q8_0 * restrict y = vy; From 139b62a839825ef20084ed75ed624db7a5ad554a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 11 Feb 2024 15:33:43 +0200 Subject: [PATCH 81/94] common : fix compile warning --- common/sampling.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index 844ad7c53..82cbdecea 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -127,8 +127,6 @@ static void sampler_queue( const llama_sampling_params & params, llama_token_data_array & cur_p, size_t & min_keep) { - const int n_vocab = llama_n_vocab(llama_get_model(ctx_main)); - const float temp = params.temp; const float dynatemp_range = params.dynatemp_range; const float dynatemp_exponent = params.dynatemp_exponent; From 85910c5b30f6e268321be8df044f5528a6efac52 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 11 Feb 2024 15:35:50 +0200 Subject: [PATCH 82/94] main : ctrl+C print timing in non-interactive mode (#3873) --- examples/main/main.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 0ed4d79f9..e8ab8cbae 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -98,7 +98,7 @@ static void write_logfile( #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) static void sigint_handler(int signo) { if (signo == SIGINT) { - if (!is_interacting) { + if (!is_interacting && g_params->interactive) { is_interacting = true; } else { console::cleanup(); @@ -392,7 +392,8 @@ int main(int argc, char ** argv) { LOG_TEE("\n"); } - if (params.interactive) { + // ctrl+C handling + { #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) struct sigaction sigint_action; sigint_action.sa_handler = sigint_handler; @@ -405,7 +406,9 @@ int main(int argc, char ** argv) { }; SetConsoleCtrlHandler(reinterpret_cast(console_ctrl_handler), true); #endif + } + if (params.interactive) { LOG_TEE("%s: interactive mode on.\n", __func__); if (!params.antiprompt.empty()) { From 684780141a08200ec98eba3e982dbafd1d0b5000 Mon Sep 17 00:00:00 2001 From: Alexey Parfenov Date: Sun, 11 Feb 2024 13:38:14 +0000 Subject: [PATCH 83/94] server : allow to specify tokens as strings in logit_bias (#5003) * server: allow to specify tokens as strings in logit_bias * Apply suggestions from code review Co-authored-by: Georgi Gerganov --------- Co-authored-by: Georgi Gerganov --- examples/server/README.md | 2 +- examples/server/server.cpp | 32 +++++++++++++++++++++++++------- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index 1db7cdf21..0f7373ae8 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -185,7 +185,7 @@ node index.js `ignore_eos`: Ignore end of stream token and continue generating (default: false). - `logit_bias`: Modify the likelihood of a token appearing in the generated text completion. For example, use `"logit_bias": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `"logit_bias": [[15043,-1.0]]` to decrease its likelihood. Setting the value to false, `"logit_bias": [[15043,false]]` ensures that the token `Hello` is never produced (default: []). + `logit_bias`: Modify the likelihood of a token appearing in the generated text completion. For example, use `"logit_bias": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `"logit_bias": [[15043,-1.0]]` to decrease its likelihood. Setting the value to false, `"logit_bias": [[15043,false]]` ensures that the token `Hello` is never produced. The tokens can also be represented as strings, e.g. `[["Hello, World!",-0.5]]` will reduce the likelihood of all the individual tokens that represent the string `Hello, World!`, just like the `presence_penalty` does. (default: []). `n_probs`: If greater than 0, the response also contains the probabilities of top N tokens for each generated token (default: 0) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 4d212f1f0..1699eb76b 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -626,18 +626,36 @@ struct llama_server_context const int n_vocab = llama_n_vocab(model); for (const auto &el : *logit_bias) { - if (el.is_array() && el.size() == 2 && el[0].is_number_integer()) + if (el.is_array() && el.size() == 2) { - llama_token tok = el[0].get(); - if (tok >= 0 && tok < n_vocab) + float bias; + if (el[1].is_number()) { - if (el[1].is_number()) + bias = el[1].get(); + } + else if (el[1].is_boolean() && !el[1].get()) + { + bias = -INFINITY; + } + else + { + continue; + } + + if (el[0].is_number_integer()) + { + llama_token tok = el[0].get(); + if (tok >= 0 && tok < n_vocab) { - slot->sparams.logit_bias[tok] = el[1].get(); + slot->sparams.logit_bias[tok] = bias; } - else if (el[1].is_boolean() && !el[1].get()) + } + else if (el[0].is_string()) + { + auto toks = llama_tokenize(model, el[0].get(), false); + for (auto tok : toks) { - slot->sparams.logit_bias[tok] = -INFINITY; + slot->sparams.logit_bias[tok] = bias; } } } From a803333a4e6fc534c93afe90d741bc2388bdec87 Mon Sep 17 00:00:00 2001 From: Alexey Parfenov Date: Sun, 11 Feb 2024 13:43:31 +0000 Subject: [PATCH 84/94] common : use enums for sampler types (#5418) * common: use enums for sampler types * Apply suggestions from code review Co-authored-by: Georgi Gerganov * minor : spaces --------- Co-authored-by: Georgi Gerganov --- common/common.cpp | 117 +++++++++++++++++++++++++++++++------------- common/common.h | 7 ++- common/sampling.cpp | 31 +++++------- common/sampling.h | 20 +++++++- 4 files changed, 120 insertions(+), 55 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 9a489a553..f64da2cb6 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -340,13 +340,14 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } - sparams.samplers_sequence = parse_samplers_input(argv[i]); + const auto sampler_names = string_split(argv[i], ';'); + sparams.samplers_sequence = sampler_types_from_names(sampler_names); } else if (arg == "--sampling-seq") { if (++i >= argc) { invalid_param = true; break; } - sparams.samplers_sequence = argv[i]; + sparams.samplers_sequence = sampler_types_from_chars(argv[i]); } else if (arg == "--top-p") { if (++i >= argc) { invalid_param = true; @@ -906,6 +907,14 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { const llama_sampling_params & sparams = params.sparams; + std::string sampler_type_chars; + std::string sampler_type_names; + for (const auto sampler_type : sparams.samplers_sequence) { + sampler_type_chars += static_cast(sampler_type); + sampler_type_names += sampler_type_to_name_string(sampler_type) + ";"; + } + sampler_type_names.pop_back(); + printf("\n"); printf("usage: %s [options]\n", argv[0]); printf("\n"); @@ -947,8 +956,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx); printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); - printf(" --samplers samplers that will be used for generation in the order, separated by \';\', for example: \"top_k;tfs;typical;top_p;min_p;temp\"\n"); - printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sparams.samplers_sequence.c_str()); + printf(" --samplers samplers that will be used for generation in the order, separated by \';\' (default: %s)\n", sampler_type_names.c_str()); + printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sampler_type_chars.c_str()); printf(" --top-k N top-k sampling (default: %d, 0 = disabled)\n", sparams.top_k); printf(" --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)sparams.top_p); printf(" --min-p N min-p sampling (default: %.1f, 0.0 = disabled)\n", (double)sparams.min_p); @@ -1097,45 +1106,85 @@ std::string gpt_random_prompt(std::mt19937 & rng) { } // -// String parsing +// String utils // -std::string parse_samplers_input(std::string input) { - std::string output = ""; +std::vector string_split(std::string input, char separator) { + std::vector parts; + size_t separator_pos = input.find(separator); + while (separator_pos != std::string::npos) { + std::string part = input.substr(0, separator_pos); + parts.emplace_back(part); + input = input.substr(separator_pos + 1); + separator_pos = input.find(separator); + } + parts.emplace_back(input); + return parts; +} + +std::vector sampler_types_from_names(const std::vector & names) { // since samplers names are written multiple ways // make it ready for both system names and input names - std::unordered_map samplers_symbols { - {"top_k", 'k'}, - {"top-k", 'k'}, - {"top_p", 'p'}, - {"top-p", 'p'}, - {"nucleus", 'p'}, - {"typical_p", 'y'}, - {"typical-p", 'y'}, - {"typical", 'y'}, - {"min_p", 'm'}, - {"min-p", 'm'}, - {"tfs_z", 'f'}, - {"tfs-z", 'f'}, - {"tfs", 'f'}, - {"temp", 't'}, - {"temperature",'t'} + std::unordered_map sampler_name_map { + {"top_k", llama_sampler_type::TOP_K}, + {"top-k", llama_sampler_type::TOP_K}, + {"top_p", llama_sampler_type::TOP_P}, + {"top-p", llama_sampler_type::TOP_P}, + {"nucleus", llama_sampler_type::TOP_P}, + {"typical_p", llama_sampler_type::TYPICAL_P}, + {"typical-p", llama_sampler_type::TYPICAL_P}, + {"typical", llama_sampler_type::TYPICAL_P}, + {"min_p", llama_sampler_type::MIN_P}, + {"min-p", llama_sampler_type::MIN_P}, + {"tfs_z", llama_sampler_type::TFS_Z}, + {"tfs-z", llama_sampler_type::TFS_Z}, + {"tfs", llama_sampler_type::TFS_Z}, + {"temp", llama_sampler_type::TEMP}, + {"temperature", llama_sampler_type::TEMP} }; - // expected format example: "temp;top_k;tfs_z;typical_p;top_p;min_p" - size_t separator = input.find(';'); - while (separator != input.npos) { - std::string name = input.substr(0,separator); - input = input.substr(separator+1); - separator = input.find(';'); - if (samplers_symbols.find(name) != samplers_symbols.end()) { - output += samplers_symbols[name]; + std::vector sampler_types; + sampler_types.reserve(names.size()); + for (const auto& name : names) { + const auto sampler_item = sampler_name_map.find(name); + if (sampler_item != sampler_name_map.end()) { + sampler_types.push_back(sampler_item->second); } } - if (samplers_symbols.find(input) != samplers_symbols.end()) { - output += samplers_symbols[input]; + return sampler_types; +} + +std::vector sampler_types_from_chars(const std::string & names_string) { + std::unordered_map sampler_name_map { + {'k', llama_sampler_type::TOP_K}, + {'p', llama_sampler_type::TOP_P}, + {'y', llama_sampler_type::TYPICAL_P}, + {'m', llama_sampler_type::MIN_P}, + {'f', llama_sampler_type::TFS_Z}, + {'t', llama_sampler_type::TEMP} + }; + + std::vector sampler_types; + sampler_types.reserve(names_string.size()); + for (const auto & c : names_string) { + const auto sampler_item = sampler_name_map.find(c); + if (sampler_item != sampler_name_map.end()) { + sampler_types.push_back(sampler_item->second); + } + } + return sampler_types; +} + +std::string sampler_type_to_name_string(llama_sampler_type sampler_type) { + switch (sampler_type) { + case llama_sampler_type::TOP_K: return "top_k"; + case llama_sampler_type::TFS_Z: return "tfs_z"; + case llama_sampler_type::TYPICAL_P: return "typical_p"; + case llama_sampler_type::TOP_P: return "top_p"; + case llama_sampler_type::MIN_P: return "min_p"; + case llama_sampler_type::TEMP: return "temp"; + default : return ""; } - return output; } // diff --git a/common/common.h b/common/common.h index 62de25d6a..9bdd45cf9 100644 --- a/common/common.h +++ b/common/common.h @@ -162,10 +162,13 @@ std::string gpt_random_prompt(std::mt19937 & rng); void process_escapes(std::string& input); // -// String parsing +// String utils // -std::string parse_samplers_input(std::string input); +std::vector sampler_types_from_names(const std::vector & names); +std::vector sampler_types_from_chars(const std::string & names_string); +std::vector string_split(std::string input, char separator); +std::string sampler_type_to_name_string(llama_sampler_type sampler_type); // // Model utils diff --git a/common/sampling.cpp b/common/sampling.cpp index 82cbdecea..a001750da 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -103,15 +103,10 @@ std::string llama_sampling_print(const llama_sampling_params & params) { std::string llama_sampling_order_print(const llama_sampling_params & params) { std::string result = "CFG -> Penalties "; if (params.mirostat == 0) { - for (auto s : params.samplers_sequence) { - switch (s) { - case 'k': result += "-> top_k "; break; - case 'f': result += "-> tfs_z "; break; - case 'y': result += "-> typical_p "; break; - case 'p': result += "-> top_p "; break; - case 'm': result += "-> min_p "; break; - case 't': result += "-> temp "; break; - default : break; + for (auto sampler_type : params.samplers_sequence) { + const auto sampler_type_name = sampler_type_to_name_string(sampler_type); + if (!sampler_type_name.empty()) { + result += "-> " + sampler_type_name + " "; } } } else { @@ -135,16 +130,16 @@ static void sampler_queue( const float min_p = params.min_p; const float tfs_z = params.tfs_z; const float typical_p = params.typical_p; - const std::string & samplers_sequence = params.samplers_sequence; + const std::vector & samplers_sequence = params.samplers_sequence; - for (auto s : samplers_sequence) { - switch (s){ - case 'k': llama_sample_top_k (ctx_main, &cur_p, top_k, min_keep); break; - case 'f': llama_sample_tail_free(ctx_main, &cur_p, tfs_z, min_keep); break; - case 'y': llama_sample_typical (ctx_main, &cur_p, typical_p, min_keep); break; - case 'p': llama_sample_top_p (ctx_main, &cur_p, top_p, min_keep); break; - case 'm': llama_sample_min_p (ctx_main, &cur_p, min_p, min_keep); break; - case 't': + for (auto sampler_type : samplers_sequence) { + switch (sampler_type) { + case llama_sampler_type::TOP_K : llama_sample_top_k (ctx_main, &cur_p, top_k, min_keep); break; + case llama_sampler_type::TFS_Z : llama_sample_tail_free(ctx_main, &cur_p, tfs_z, min_keep); break; + case llama_sampler_type::TYPICAL_P: llama_sample_typical (ctx_main, &cur_p, typical_p, min_keep); break; + case llama_sampler_type::TOP_P : llama_sample_top_p (ctx_main, &cur_p, top_p, min_keep); break; + case llama_sampler_type::MIN_P : llama_sample_min_p (ctx_main, &cur_p, min_p, min_keep); break; + case llama_sampler_type::TEMP: if (dynatemp_range > 0) { float dynatemp_min = std::max(0.0f, temp - dynatemp_range); float dynatemp_max = std::max(0.0f, temp + dynatemp_range); diff --git a/common/sampling.h b/common/sampling.h index 88899c094..2bd6a75d2 100644 --- a/common/sampling.h +++ b/common/sampling.h @@ -8,6 +8,16 @@ #include #include +// sampler types +enum class llama_sampler_type : char { + TOP_K = 'k', + TOP_P = 'p', + MIN_P = 'm', + TFS_Z = 'f', + TYPICAL_P = 'y', + TEMP = 't' +}; + // sampling parameters typedef struct llama_sampling_params { int32_t n_prev = 64; // number of previous tokens to remember @@ -28,7 +38,15 @@ typedef struct llama_sampling_params { float mirostat_tau = 5.00f; // target entropy float mirostat_eta = 0.10f; // learning rate bool penalize_nl = true; // consider newlines as a repeatable token - std::string samplers_sequence = "kfypmt"; // top_k, tail_free, typical_p, top_p, min_p, temp + + std::vector samplers_sequence = { + llama_sampler_type::TOP_K, + llama_sampler_type::TFS_Z, + llama_sampler_type::TYPICAL_P, + llama_sampler_type::TOP_P, + llama_sampler_type::MIN_P, + llama_sampler_type::TEMP + }; std::string grammar; // optional BNF-like grammar to constrain sampling From c88c74f967028ae3d5ebade40ae586d20a961abc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20L=C3=B3pez?= Date: Sun, 11 Feb 2024 15:12:00 +0100 Subject: [PATCH 85/94] vulkan: only use M-sized matmul on Apple GPUs (#5412) * vulkan: refactor guess_matmul_pipeline for vendor Refactor ggml_vk_guess_matmul_pipeline to simplify adding per-vendor conditionals. Signed-off-by: Sergio Lopez * vulkan: only use M-sized matmul on Apple GPUs L-sized and S-sized matmuls are broken on Apple GPUs, force using M-size with this vendor. Signed-off-by: Sergio Lopez --------- Signed-off-by: Sergio Lopez --- ggml-vulkan.cpp | 103 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 93 insertions(+), 10 deletions(-) diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 254f648a6..7834e635c 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -27,6 +27,7 @@ #define CEIL_DIV(M, N) (((M) + (N)-1) / (N)) #define VK_VENDOR_ID_AMD 0x1002 +#define VK_VENDOR_ID_APPLE 0x106b #define VK_VENDOR_ID_INTEL 0x8086 #define VK_VENDOR_ID_NVIDIA 0x10de @@ -2034,18 +2035,100 @@ static uint32_t ggml_vk_guess_matmul_pipeline_align(ggml_backend_vk_context * ct return ctx->pipeline_matmul_f32_aligned_l.align; } -static vk_pipeline* ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, int m, int n, bool aligned) { -#ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_guess_matmul_pipeline(" << bit16_x << ", " << bit16_y << ", " << m << ", " << n << ", " << aligned << ")"; -#endif +static vk_pipeline* ggml_vk_guess_matmul_pipeline_amd(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, int m, int n, bool aligned) { if (bit16_x && bit16_y) { - if (ctx->device.lock()->vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) { + if (m <= 32 || n <= 32) { #ifdef GGML_VULKAN_DEBUG std::cerr << " S" << std::endl; #endif return aligned ? &ctx->pipeline_matmul_f16_aligned_s : &ctx->pipeline_matmul_f16_s; } - if (ctx->device.lock()->subgroup_size == 64 || m <= 64 || n <= 64) { +#ifdef GGML_VULKAN_DEBUG + std::cerr << " M" << std::endl; +#endif + return aligned ? &ctx->pipeline_matmul_f16_aligned_m : &ctx->pipeline_matmul_f16_m; + } + if (bit16_x && !bit16_y) { + if (m <= 32 || n <= 32) { +#ifdef GGML_VULKAN_DEBUG + std::cerr << " S" << std::endl; +#endif + return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_s : &ctx->pipeline_matmul_f16_f32_s; + } +#ifdef GGML_VULKAN_DEBUG + std::cerr << " M" << std::endl; +#endif + return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_m : &ctx->pipeline_matmul_f16_f32_m; + } + if (!bit16_x && bit16_y) { + GGML_ASSERT(false); + } + + if (m <= 32 || n <= 32) { +#ifdef GGML_VULKAN_DEBUG + std::cerr << " S" << std::endl; +#endif + return aligned ? &ctx->pipeline_matmul_f32_aligned_s : &ctx->pipeline_matmul_f32_s; + } +#ifdef GGML_VULKAN_DEBUG + std::cerr << " M" << std::endl; +#endif + return aligned ? &ctx->pipeline_matmul_f32_aligned_m : &ctx->pipeline_matmul_f32_m; +} + +static vk_pipeline* ggml_vk_guess_matmul_pipeline_apple(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, bool aligned) { +#ifdef GGML_VULKAN_DEBUG + std::cerr << " M" << std::endl; +#endif + if (bit16_x && bit16_y) { + return aligned ? &ctx->pipeline_matmul_f16_aligned_m : &ctx->pipeline_matmul_f16_m; + } + if (bit16_x && !bit16_y) { + return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_m : &ctx->pipeline_matmul_f16_f32_m; + } + if (!bit16_x && bit16_y) { + GGML_ASSERT(false); + } + return aligned ? &ctx->pipeline_matmul_f32_aligned_m : &ctx->pipeline_matmul_f32_m; +} + +static vk_pipeline* ggml_vk_guess_matmul_pipeline_intel(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, bool aligned) { +#ifdef GGML_VULKAN_DEBUG + std::cerr << " S" << std::endl; +#endif + if (bit16_x && bit16_y) { + return aligned ? &ctx->pipeline_matmul_f16_aligned_s : &ctx->pipeline_matmul_f16_s; + } + if (bit16_x && !bit16_y) { + return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_s : &ctx->pipeline_matmul_f16_f32_s; + } + if (!bit16_x && bit16_y) { + GGML_ASSERT(false); + } + return aligned ? &ctx->pipeline_matmul_f32_aligned_s : &ctx->pipeline_matmul_f32_s; +} + +static vk_pipeline* ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, int m, int n, bool aligned) { +#ifdef GGML_VULKAN_DEBUG + std::cerr << "ggml_vk_guess_matmul_pipeline(" << bit16_x << ", " << bit16_y << ", " << m << ", " << n << ", " << aligned << ")"; +#endif + switch (ctx->device.lock()->vendor_id) { + case VK_VENDOR_ID_AMD: + return ggml_vk_guess_matmul_pipeline_amd(ctx, bit16_x, bit16_y, m, n, aligned); + case VK_VENDOR_ID_APPLE: + return ggml_vk_guess_matmul_pipeline_apple(ctx, bit16_x, bit16_y, aligned); + case VK_VENDOR_ID_INTEL: + return ggml_vk_guess_matmul_pipeline_intel(ctx, bit16_x, bit16_y, aligned); + } + + if (bit16_x && bit16_y) { + if (m <= 32 || n <= 32) { +#ifdef GGML_VULKAN_DEBUG + std::cerr << " S" << std::endl; +#endif + return aligned ? &ctx->pipeline_matmul_f16_aligned_s : &ctx->pipeline_matmul_f16_s; + } + if (m <= 64 || n <= 64) { #ifdef GGML_VULKAN_DEBUG std::cerr << " M" << std::endl; #endif @@ -2057,13 +2140,13 @@ static vk_pipeline* ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, return aligned ? &ctx->pipeline_matmul_f16_aligned_l : &ctx->pipeline_matmul_f16_l; } if (bit16_x && !bit16_y) { - if (ctx->device.lock()->vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) { + if (m <= 32 || n <= 32) { #ifdef GGML_VULKAN_DEBUG std::cerr << " S" << std::endl; #endif return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_s : &ctx->pipeline_matmul_f16_f32_s; } - if (ctx->device.lock()->subgroup_size == 64 || m <= 64 || n <= 64) { + if (m <= 64 || n <= 64) { #ifdef GGML_VULKAN_DEBUG std::cerr << " M" << std::endl; #endif @@ -2078,13 +2161,13 @@ static vk_pipeline* ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, GGML_ASSERT(false); } - if (ctx->device.lock()->vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) { + if (m <= 32 || n <= 32) { #ifdef GGML_VULKAN_DEBUG std::cerr << " S" << std::endl; #endif return aligned ? &ctx->pipeline_matmul_f32_aligned_s : &ctx->pipeline_matmul_f32_s; } - if (ctx->device.lock()->subgroup_size == 64 || m <= 64 || n <= 64) { + if (m <= 64 || n <= 64) { #ifdef GGML_VULKAN_DEBUG std::cerr << " M" << std::endl; #endif From 97a336507ed9b971d72262bec7e2b8b7016a054a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 11 Feb 2024 00:17:31 +0000 Subject: [PATCH 86/94] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'nixpkgs': 'github:NixOS/nixpkgs/b8b232ae7b8b144397fdb12d20f592e5e7c1a64d' (2024-01-31) → 'github:NixOS/nixpkgs/f8e2ebd66d097614d51a56a755450d4ae1632df1' (2024-02-07) --- flake.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flake.lock b/flake.lock index 8cfc78273..239d0686c 100644 --- a/flake.lock +++ b/flake.lock @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1706732774, - "narHash": "sha256-hqJlyJk4MRpcItGYMF+3uHe8HvxNETWvlGtLuVpqLU0=", + "lastModified": 1707268954, + "narHash": "sha256-2en1kvde3cJVc3ZnTy8QeD2oKcseLFjYPLKhIGDanQ0=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "b8b232ae7b8b144397fdb12d20f592e5e7c1a64d", + "rev": "f8e2ebd66d097614d51a56a755450d4ae1632df1", "type": "github" }, "original": { From 2891c8aa9af17f4ff636ff3868bc34ff72b56e25 Mon Sep 17 00:00:00 2001 From: Douglas Hanley Date: Sun, 11 Feb 2024 10:21:38 -0600 Subject: [PATCH 87/94] Add support for BERT embedding models (#5423) * BERT model graph construction (build_bert) * WordPiece tokenizer (llm_tokenize_wpm) * Add flag for non-causal attention models * Allow for models that only output embeddings * Support conversion of BERT models to GGUF * Based on prior work by @xyzhang626 and @skeskinen --------- Co-authored-by: Jared Van Bortel Co-authored-by: Jared Van Bortel Co-authored-by: Georgi Gerganov --- .flake8 | 1 + convert-hf-to-gguf.py | 94 ++++++ examples/embedding/embedding.cpp | 12 +- gguf-py/gguf/constants.py | 43 +-- gguf-py/gguf/gguf_writer.py | 6 + gguf-py/gguf/tensor_mapping.py | 13 +- llama.cpp | 498 +++++++++++++++++++++++++++++-- llama.h | 1 + 8 files changed, 616 insertions(+), 52 deletions(-) diff --git a/.flake8 b/.flake8 index 113ca5fd3..18fba2c15 100644 --- a/.flake8 +++ b/.flake8 @@ -1,2 +1,3 @@ [flake8] max-line-length = 125 +ignore = W503 diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 0d4ea03b4..cae1551a2 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -209,6 +209,8 @@ class Model: return InternLM2Model if model_architecture == "MiniCPMForCausalLM": return MiniCPMModel + if model_architecture == "BertModel": + return BertModel return Model def _is_model_safetensors(self) -> bool: @@ -264,6 +266,8 @@ class Model: return gguf.MODEL_ARCH.INTERNLM2 if arch == "MiniCPMForCausalLM": return gguf.MODEL_ARCH.MINICPM + if arch == "BertModel": + return gguf.MODEL_ARCH.BERT raise NotImplementedError(f'Architecture "{arch}" not supported!') @@ -1629,6 +1633,96 @@ in chat mode so that the conversation can end normally.") self.post_write_tensors(tensor_map, name, data_torch) +class BertModel(Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.block_count = self.hparams["num_hidden_layers"] + + def set_gguf_parameters(self): + # TODO(cebtenzzre): merge with parent class + self.gguf_writer.add_name(self.dir_model.name) + self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) + self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) + self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) + self.gguf_writer.add_block_count(self.block_count) + self.gguf_writer.add_head_count(self.hparams["num_attention_heads"]) + self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"]) + self.gguf_writer.add_causal_attention(False) + self.gguf_writer.add_file_type(self.ftype) + + def set_vocab(self): + path = self.dir_model + added_tokens_path = self.dir_model if self.dir_model.exists() else None + + # use huggingface vocab to get all tokens + vocab = HfVocab(path, added_tokens_path) + tokens, scores, toktypes = zip(*vocab.all_tokens()) + assert len(tokens) == vocab.vocab_size + + # we need this to validate the size of the token_type embeddings + # though currently we are passing all zeros to the token_type embeddings + n_token_types = len(set(toktypes)) + self.gguf_writer.add_token_type_count(n_token_types) + + # convert to phantom space vocab + def phantom(tok, typ): + if tok.startswith(b"[") and tok.endswith(b"]"): + return tok + if tok.startswith(b"##"): + return tok[2:] + return b"\xe2\x96\x81" + tok + tokens = [phantom(t, y) for t, y in zip(tokens, toktypes)] + + # set up bos and eos tokens (cls and sep) + self.gguf_writer.add_bos_token_id(vocab.tokenizer.cls_token_id) + self.gguf_writer.add_eos_token_id(vocab.tokenizer.sep_token_id) + + # add vocab to gguf + self.gguf_writer.add_tokenizer_model("bert") + self.gguf_writer.add_token_list(tokens) + self.gguf_writer.add_token_scores(scores) + self.gguf_writer.add_token_types(toktypes) + + # handle special tokens + special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) + special_vocab.add_to_gguf(self.gguf_writer) + + def write_tensors(self): + tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) + tensors = dict(self.get_tensors()) + for name, data_torch in tensors.items(): + # we are only using BERT for embeddings so we don't need the pooling layer + if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias"): + continue # we don't need these + + # map tensor names + new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) + if new_name is None: + print(f"Can not map tensor {name!r}") + sys.exit() + + data = data_torch.squeeze().numpy() + n_dims = len(data.shape) + new_dtype: type[np.floating[Any]] + + if ( + self.ftype == 1 and name.endswith(".weight") and n_dims == 2 + and name != "embeddings.token_type_embeddings.weight" # not used with get_rows, must be F32 + ): + # if f16 desired, convert any float32 2-dim weight tensors to float16 + new_dtype = np.float16 + else: + # if f32 desired, convert any float16 to float32 + new_dtype = np.float32 + + print(f"{new_name}, n_dims = {n_dims}, {data_torch.dtype} --> {new_dtype}") + + if data.dtype != new_dtype: + data = data.astype(new_dtype) + + self.gguf_writer.add_tensor(new_name, data) + + ###### CONVERSION LOGIC ###### diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 3295cd240..27376c8f0 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -87,7 +87,17 @@ int main(int argc, char ** argv) { } const int n_embd = llama_n_embd(model); - const auto * embeddings = llama_get_embeddings(ctx); + auto * embeddings = llama_get_embeddings(ctx); + + // l2-normalize embeddings + float norm = 0; + for (int i = 0; i < n_embd; i++) { + norm += embeddings[i] * embeddings[i]; + } + norm = sqrt(norm); + for (int i = 0; i < n_embd; i++) { + embeddings[i] /= norm; + } for (int i = 0; i < n_embd; i++) { printf("%f ", embeddings[i]); diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 1cfd41c0b..a9c13dd38 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -50,6 +50,7 @@ class Keys: VALUE_LENGTH = "{arch}.attention.value_length" LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon" LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon" + CAUSAL = "{arch}.attention.causal" class Rope: DIMENSION_COUNT = "{arch}.rope.dimension_count" @@ -60,22 +61,23 @@ class Keys: SCALING_FINETUNED = "{arch}.rope.scaling.finetuned" class Tokenizer: - MODEL = "tokenizer.ggml.model" - LIST = "tokenizer.ggml.tokens" - TOKEN_TYPE = "tokenizer.ggml.token_type" - SCORES = "tokenizer.ggml.scores" - MERGES = "tokenizer.ggml.merges" - BOS_ID = "tokenizer.ggml.bos_token_id" - EOS_ID = "tokenizer.ggml.eos_token_id" - UNK_ID = "tokenizer.ggml.unknown_token_id" - SEP_ID = "tokenizer.ggml.seperator_token_id" - PAD_ID = "tokenizer.ggml.padding_token_id" - ADD_BOS = "tokenizer.ggml.add_bos_token" - ADD_EOS = "tokenizer.ggml.add_eos_token" - ADD_PREFIX = "tokenizer.ggml.add_space_prefix" - HF_JSON = "tokenizer.huggingface.json" - RWKV = "tokenizer.rwkv.world" - CHAT_TEMPLATE = "tokenizer.chat_template" + MODEL = "tokenizer.ggml.model" + LIST = "tokenizer.ggml.tokens" + TOKEN_TYPE = "tokenizer.ggml.token_type" + TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types + SCORES = "tokenizer.ggml.scores" + MERGES = "tokenizer.ggml.merges" + BOS_ID = "tokenizer.ggml.bos_token_id" + EOS_ID = "tokenizer.ggml.eos_token_id" + UNK_ID = "tokenizer.ggml.unknown_token_id" + SEP_ID = "tokenizer.ggml.seperator_token_id" + PAD_ID = "tokenizer.ggml.padding_token_id" + ADD_BOS = "tokenizer.ggml.add_bos_token" + ADD_EOS = "tokenizer.ggml.add_eos_token" + ADD_PREFIX = "tokenizer.ggml.add_space_prefix" + HF_JSON = "tokenizer.huggingface.json" + RWKV = "tokenizer.rwkv.world" + CHAT_TEMPLATE = "tokenizer.chat_template" # @@ -122,6 +124,7 @@ class MODEL_TENSOR(IntEnum): ATTN_OUT = auto() ATTN_NORM = auto() ATTN_NORM_2 = auto() + ATTN_OUT_NORM = auto() ATTN_ROT_EMBD = auto() FFN_GATE_INP = auto() FFN_NORM = auto() @@ -134,6 +137,7 @@ class MODEL_TENSOR(IntEnum): FFN_UP_EXP = auto() ATTN_Q_NORM = auto() ATTN_K_NORM = auto() + LAYER_OUT_NORM = auto() MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { @@ -178,6 +182,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = { MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd", MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm", MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm", + MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm", MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp", MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm", MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate", @@ -187,6 +192,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = { MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate.{xid}", MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down.{xid}", MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up.{xid}", + MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm", } MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { @@ -262,17 +268,18 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { ], MODEL_ARCH.BERT: [ MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.TOKEN_EMBD_NORM, MODEL_TENSOR.TOKEN_TYPES, MODEL_TENSOR.POS_EMBD, MODEL_TENSOR.OUTPUT_NORM, - MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_OUT_NORM, MODEL_TENSOR.ATTN_Q, MODEL_TENSOR.ATTN_K, MODEL_TENSOR.ATTN_V, MODEL_TENSOR.ATTN_OUT, - MODEL_TENSOR.FFN_NORM, MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.LAYER_OUT_NORM, ], MODEL_ARCH.MPT: [ MODEL_TENSOR.TOKEN_EMBD, diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 16808196e..7af58a46c 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -357,6 +357,9 @@ class GGUFWriter: def add_layer_norm_rms_eps(self, value: float) -> None: self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value) + def add_causal_attention(self, value: bool) -> None: + self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value) + def add_rope_dimension_count(self, count: int) -> None: self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count) @@ -387,6 +390,9 @@ class GGUFWriter: def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None: self.add_array(Keys.Tokenizer.TOKEN_TYPE, types) + def add_token_type_count(self, value: int) -> None: + self.add_uint32(Keys.Tokenizer.TOKEN_TYPE_COUNT, value) + def add_token_scores(self, scores: Sequence[float]) -> None: self.add_array(Keys.Tokenizer.SCORES, scores) diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 4f16d8504..c7ba1420e 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -30,6 +30,7 @@ class TensorNameMap: # Normalization of token embeddings MODEL_TENSOR.TOKEN_EMBD_NORM: ( "word_embeddings_layernorm", # bloom + "embeddings.LayerNorm", # bert ), # Position embeddings @@ -54,7 +55,6 @@ class TensorNameMap: "transformer.ln_f", # gpt2 gpt-j falcon "model.norm", # llama-hf baichuan internlm2 "norm", # llama-pth - "embeddings.LayerNorm", # bert "transformer.norm_f", # mpt "ln_f", # refact bloom qwen gpt2 "language_model.encoder.final_layernorm", # persimmon @@ -79,7 +79,6 @@ class TensorNameMap: "transformer.h.{bid}.ln_mlp", # falcon40b "model.layers.{bid}.input_layernorm", # llama-hf "layers.{bid}.attention_norm", # llama-pth - "encoder.layer.{bid}.attention.output.LayerNorm", # bert "language_model.encoder.layers.{bid}.input_layernorm", # persimmon "model.layers.{bid}.ln1", # yi "h.{bid}.ln_1", # gpt2 @@ -155,6 +154,11 @@ class TensorNameMap: "model.layers.{bid}.attention.wo", # internlm2 ), + # Attention output norm + MODEL_TENSOR.ATTN_OUT_NORM: ( + "encoder.layer.{bid}.attention.output.LayerNorm", # bert + ), + # Rotary embeddings MODEL_TENSOR.ATTN_ROT_EMBD: ( "model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf @@ -171,7 +175,6 @@ class TensorNameMap: "transformer.blocks.{bid}.norm_2", # mpt "model.layers.{bid}.post_attention_layernorm", # llama-hf "layers.{bid}.ffn_norm", # llama-pth - "encoder.layer.{bid}.output.LayerNorm", # bert "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon "model.layers.{bid}.ln2", # yi "h.{bid}.ln_2", # gpt2 @@ -266,6 +269,10 @@ class TensorNameMap: MODEL_TENSOR.ROPE_FREQS: ( "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon ), + + MODEL_TENSOR.LAYER_OUT_NORM: ( + "encoder.layer.{bid}.output.LayerNorm", # bert + ) } mapping: dict[str, tuple[MODEL_TENSOR, str]] diff --git a/llama.cpp b/llama.cpp index 3f39a67fb..d1ee26ce2 100644 --- a/llama.cpp +++ b/llama.cpp @@ -196,6 +196,7 @@ enum llm_arch { LLM_ARCH_STARCODER, LLM_ARCH_PERSIMMON, LLM_ARCH_REFACT, + LLM_ARCH_BERT, LLM_ARCH_BLOOM, LLM_ARCH_STABLELM, LLM_ARCH_QWEN, @@ -220,6 +221,7 @@ static std::map LLM_ARCH_NAMES = { { LLM_ARCH_STARCODER, "starcoder" }, { LLM_ARCH_PERSIMMON, "persimmon" }, { LLM_ARCH_REFACT, "refact" }, + { LLM_ARCH_BERT, "bert" }, { LLM_ARCH_BLOOM, "bloom" }, { LLM_ARCH_STABLELM, "stablelm" }, { LLM_ARCH_QWEN, "qwen" }, @@ -261,6 +263,7 @@ enum llm_kv { LLM_KV_ATTENTION_VALUE_LENGTH, LLM_KV_ATTENTION_LAYERNORM_EPS, LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, + LLM_KV_ATTENTION_CAUSAL, LLM_KV_ROPE_DIMENSION_COUNT, LLM_KV_ROPE_FREQ_BASE, @@ -273,6 +276,7 @@ enum llm_kv { LLM_KV_TOKENIZER_MODEL, LLM_KV_TOKENIZER_LIST, LLM_KV_TOKENIZER_TOKEN_TYPE, + LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, LLM_KV_TOKENIZER_SCORES, LLM_KV_TOKENIZER_MERGES, LLM_KV_TOKENIZER_BOS_ID, @@ -316,6 +320,7 @@ static std::map LLM_KV_NAMES = { { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" }, { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" }, { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" }, + { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" }, { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" }, { LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" }, @@ -328,6 +333,7 @@ static std::map LLM_KV_NAMES = { { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" }, { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" }, { LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" }, + { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" }, { LLM_KV_TOKENIZER_SCORES, "tokenizer.ggml.scores" }, { LLM_KV_TOKENIZER_MERGES, "tokenizer.ggml.merges" }, { LLM_KV_TOKENIZER_BOS_ID, "tokenizer.ggml.bos_token_id" }, @@ -355,6 +361,7 @@ struct LLM_KV { enum llm_tensor { LLM_TENSOR_TOKEN_EMBD, LLM_TENSOR_TOKEN_EMBD_NORM, + LLM_TENSOR_TOKEN_TYPES, LLM_TENSOR_POS_EMBD, LLM_TENSOR_OUTPUT, LLM_TENSOR_OUTPUT_NORM, @@ -536,6 +543,23 @@ static std::map> LLM_TENSOR_NAMES = { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, }, }, + { + LLM_ARCH_BERT, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" }, + { LLM_TENSOR_TOKEN_TYPES, "token_types" }, + { LLM_TENSOR_POS_EMBD, "position_embd" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_output_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.layer_output_norm" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + }, + }, { LLM_ARCH_BLOOM, { @@ -1440,6 +1464,11 @@ static llama_state g_state; // available llama models enum e_model { MODEL_UNKNOWN, + MODEL_17M, + MODEL_22M, + MODEL_33M, + MODEL_109M, + MODEL_335M, MODEL_0_5B, MODEL_1B, MODEL_2B, @@ -1481,6 +1510,7 @@ struct llama_hparams { uint32_t n_ff; uint32_t n_expert = 0; uint32_t n_expert_used = 0; + uint32_t n_vocab_type = 0; // for BERT-style token types float f_norm_eps; float f_norm_rms_eps; @@ -1493,6 +1523,8 @@ struct llama_hparams { float f_clamp_kqv; float f_max_alibi_bias; + bool causal_attn = true; + bool operator!=(const llama_hparams & other) const { if (this->vocab_only != other.vocab_only) return true; @@ -1720,6 +1752,7 @@ struct llama_model { llama_vocab vocab; struct ggml_tensor * tok_embd; + struct ggml_tensor * type_embd; struct ggml_tensor * pos_embd; struct ggml_tensor * tok_norm; struct ggml_tensor * tok_norm_b; @@ -1850,6 +1883,7 @@ struct llama_context { struct ggml_tensor * inp_pos; // I32 [n_batch] struct ggml_tensor * inp_KQ_mask; // F32 [n_ctx, n_batch] struct ggml_tensor * inp_K_shift; // I32 [n_ctx] + struct ggml_tensor * inp_sum; // F32 [1, n_batch] #ifdef GGML_USE_MPI ggml_mpi_context * ctx_mpi = NULL; @@ -2829,6 +2863,7 @@ static const char * llama_model_vocab_type_name(enum llama_vocab_type type){ switch (type) { case LLAMA_VOCAB_TYPE_SPM: return "SPM"; case LLAMA_VOCAB_TYPE_BPE: return "BPE"; + case LLAMA_VOCAB_TYPE_WPM: return "WPM"; default: return "unknown"; } } @@ -3000,6 +3035,26 @@ static void llm_load_hparams( default: model.type = e_model::MODEL_UNKNOWN; } } break; + case LLM_ARCH_BERT: + { + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps); + ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn); + ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type); + + switch (hparams.n_layer) { + case 3: + model.type = e_model::MODEL_17M; break; // bge-micro + case 6: + model.type = e_model::MODEL_22M; break; // MiniLM-L6 + case 12: + switch (hparams.n_embd) { + case 384: model.type = e_model::MODEL_33M; break; // MiniLM-L12, bge-small + case 768: model.type = e_model::MODEL_109M; break; // bge-base + } break; + case 24: + model.type = e_model::MODEL_335M; break; // bge-large + } + } break; case LLM_ARCH_BLOOM: { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps); @@ -3204,6 +3259,16 @@ static void llm_load_vocab( vocab.special_unk_id = -1; vocab.special_sep_id = -1; vocab.special_pad_id = -1; + } else if (tokenizer_name == "bert") { + vocab.type = LLAMA_VOCAB_TYPE_WPM; + + // default special tokens + vocab.special_bos_id = 101; + vocab.special_eos_id = 102; + vocab.special_unk_id = 100; + vocab.special_sep_id = -1; + vocab.special_pad_id = -1; + vocab.add_space_prefix = false; } else { LLAMA_LOG_WARN("%s: unknown tokenizer: '%s'", __func__, tokenizer_name.c_str()); LLAMA_LOG_WARN("%s: using default tokenizer: 'llama'", __func__); @@ -3232,6 +3297,8 @@ static void llm_load_vocab( // determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n' if (vocab.type == LLAMA_VOCAB_TYPE_SPM) { vocab.linefeed_id = llama_byte_to_token(vocab, '\n'); + } else if (vocab.type == LLAMA_VOCAB_TYPE_WPM) { + vocab.linefeed_id = vocab.special_pad_id; } else { const std::vector ids = llama_tokenize_internal(vocab, "\u010A", false); GGML_ASSERT(!ids.empty() && "model vocab missing newline token"); @@ -3569,6 +3636,7 @@ static bool llm_load_tensors( const int64_t n_embd_v_gqa = hparams.n_embd_v_gqa(); const int64_t n_embd_gqa = n_embd_v_gqa; const int64_t n_vocab = hparams.n_vocab; + const int64_t n_vocab_type = hparams.n_vocab_type; const int64_t n_ff = hparams.n_ff; GGML_ASSERT(n_embd_gqa == n_embd_k_gqa); @@ -3783,11 +3851,50 @@ static bool llm_load_tensors( layer.attn_k_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K_NORM, "bias", i), {64}); } } break; - case LLM_ARCH_BLOOM: + case LLM_ARCH_BERT: { model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); - model.tok_norm = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}); - model.tok_norm_b = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd}); + model.type_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_vocab_type}); + model.pos_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, hparams.n_ctx_train}); + model.tok_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}); + model.tok_norm_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd}); + + for (int i = 0; i < n_layer; ++i) { + ggml_context * ctx_layer = ctx_for_layer(i); + ggml_context * ctx_split = ctx_for_layer_split(i); + + auto & layer = model.layers[i]; + + layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}); + layer.attn_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "bias", i), {n_embd}); + + layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}); + layer.ffn_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "bias", i), {n_embd}); + + layer.wq = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd}); + layer.bq = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}); + + layer.wk = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_gqa}); + layer.bk = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}); + + layer.wv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa}); + layer.bv = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}); + + layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}); + layer.bo = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}); + + layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}); + layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}); + + layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}); + layer.ffn_down_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}); + } + } break; + case LLM_ARCH_BLOOM: + { + model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); + model.tok_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}); + model.tok_norm_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd}); // output { @@ -4739,6 +4846,7 @@ struct llm_build_context { const int32_t n_orig_ctx; const bool do_rope_shift; + const bool causal_attn; const llm_build_cb & cb; @@ -4782,6 +4890,7 @@ struct llm_build_context { kv_head (worst_case ? n_ctx - n_tokens : kv_self.head), n_orig_ctx (cparams.n_yarn_orig_ctx), do_rope_shift (worst_case || kv_self.has_shift), + causal_attn (hparams.causal_attn), cb (cb), buf_compute_meta (lctx.buf_compute_meta) { // all initializations should be done in init() @@ -5625,6 +5734,100 @@ struct llm_build_context { return gf; } + struct ggml_cgraph * build_bert() { + struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); + + const int64_t n_embd_head = hparams.n_embd_head_v; + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); + + struct ggml_tensor * cur; + struct ggml_tensor * inpL; + + // get input vectors with right size + struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); + struct ggml_tensor * inp_sum = ggml_view_1d(ctx0, lctx.inp_sum, n_tokens, 0); + + // construct input embeddings (token, type, position) + inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); + // token types are hardcoded to zero ("Sentence A") + struct ggml_tensor * type_row0 = ggml_view_1d(ctx0, model.type_embd, n_embd, 0); + inpL = ggml_add(ctx0, inpL, type_row0); + inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, inp_pos), inpL); + cb(inpL, "inp_embd", -1); + + // embed layer norm + inpL = llm_build_norm(ctx0, inpL, hparams, model.tok_norm, model.tok_norm_b, LLM_NORM, cb, -1); + cb(inpL, "inp_norm", -1); + + // KQ_mask (mask for 1 head, it will be broadcasted to all heads) + struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); + cb(KQ_mask, "KQ_mask", -1); // [n_kv, n_tokens] + + // iterate layers + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor * cur = inpL; + + // self-attention + { + struct ggml_tensor * Qcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), model.layers[il].bq); + cb(Qcur, "Qcur", il); + + struct ggml_tensor * Kcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wk, cur), model.layers[il].bk); + cb(Kcur, "Kcur", il); + + struct ggml_tensor * Vcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wv, cur), model.layers[il].bv); + cb(Vcur, "Vcur", il); + + // seems like we just need to do this for Q? + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + + cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, + model.layers[il].wo, model.layers[il].bo, + Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, -1.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il); + cb(cur, "kqv_out", il); + } + + // re-add the layer input + cur = ggml_add(ctx0, cur, inpL); + + // attention layer norm + cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_norm, model.layers[il].attn_norm_b, LLM_NORM, cb, il); + + struct ggml_tensor * ffn_inp = cur; + cb(ffn_inp, "ffn_inp", il); + + // feed-forward network + cur = llm_build_ffn(ctx0, cur, + model.layers[il].ffn_up, model.layers[il].ffn_up_b, + NULL, NULL, + model.layers[il].ffn_down, model.layers[il].ffn_down_b, + NULL, + LLM_FFN_GELU, LLM_FFN_SEQ, cb, il); + cb(cur, "ffn_out", il); + + // attentions bypass the intermediate layer + cur = ggml_add(ctx0, cur, ffn_inp); + + // output layer norm + cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].ffn_norm, model.layers[il].ffn_norm_b, LLM_NORM, cb, il); + + // input for next layer + inpL = cur; + } + + // final output + cur = inpL; + + // pooling + cur = ggml_mul_mat(ctx0, inp_sum, ggml_cont(ctx0, ggml_transpose(ctx0, cur))); + cb(cur, "result_embed", -1); + + ggml_build_forward_expand(gf, cur); + + return gf; + } + struct ggml_cgraph * build_bloom() { struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); @@ -7060,7 +7263,8 @@ static struct ggml_cgraph * llama_build_graph( for (int i = 0; i < n_kv; ++i) { float f; - if (!lctx.kv_self.cells[i].has_seq_id(seq_id) || lctx.kv_self.cells[i].pos > pos) { + if (!lctx.kv_self.cells[i].has_seq_id(seq_id) || + (llm.causal_attn && lctx.kv_self.cells[i].pos > pos)) { f = -INFINITY; } else { f = 0; @@ -7081,6 +7285,15 @@ static struct ggml_cgraph * llama_build_graph( data[i] = lctx.kv_self.cells[i].delta; } } + + { + GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_sum->buffer)); + float * data = (float *) lctx.inp_sum->data; + + for (int i = 0; i < batch.n_tokens; ++i) { + data[i] = 1.0f/float(batch.n_tokens); + } + } } llm.init(); @@ -7110,6 +7323,10 @@ static struct ggml_cgraph * llama_build_graph( { result = llm.build_refact(); } break; + case LLM_ARCH_BERT: + { + result = llm.build_bert(); + } break; case LLM_ARCH_BLOOM: { result = llm.build_bloom(); @@ -7269,13 +7486,18 @@ static int llama_decode_internal( // the output is always the last tensor in the graph struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1]; - GGML_ASSERT(strcmp(res->name, "result_output") == 0); - - // the embeddings could be the second to last tensor, or the third to last tensor struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 2]; - if (strcmp(embeddings->name, "result_norm") != 0) { - embeddings = gf->nodes[gf->n_nodes - 3]; - GGML_ASSERT(strcmp(embeddings->name, "result_norm") == 0); + if (strcmp(res->name, "result_output") == 0) { + // the embeddings could be the second to last tensor, or the third to last tensor + if (strcmp(embeddings->name, "result_norm") != 0) { + embeddings = gf->nodes[gf->n_nodes - 3]; + GGML_ASSERT(strcmp(embeddings->name, "result_norm") == 0); + } + } else if (strcmp(res->name, "result_embed") == 0) { + embeddings = res; + res = nullptr; + } else { + GGML_ASSERT(false); } // LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs); @@ -7344,7 +7566,7 @@ static int llama_decode_internal( // extract logits // TODO: do not compute and extract logits if only embeddings are needed // need to update the graphs to skip "result_output" - { + if (res) { auto & logits_out = lctx.logits; #ifndef NDEBUG @@ -7388,9 +7610,11 @@ static int llama_decode_internal( if (!lctx.embedding.empty()) { auto & embedding_out = lctx.embedding; + const int64_t embed_pos = res ? n_embd * (n_tokens-1) : 0; + embedding_out.resize(n_embd); ggml_backend_t embeddings_backend = ggml_backend_sched_get_node_backend(lctx.sched, embeddings); - ggml_backend_tensor_get_async(embeddings_backend, embeddings, embedding_out.data(), (n_embd*(n_tokens - 1))*sizeof(float), n_embd*sizeof(float)); + ggml_backend_tensor_get_async(embeddings_backend, embeddings, embedding_out.data(), embed_pos*sizeof(float), n_embd*sizeof(float)); ggml_backend_synchronize(embeddings_backend); } @@ -7454,6 +7678,9 @@ static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) { GGML_ASSERT(false); return unicode_to_bytes_bpe(token_data.text); } + case LLAMA_VOCAB_TYPE_WPM: { + GGML_ASSERT(false); + } default: GGML_ASSERT(false); } @@ -7466,6 +7693,7 @@ static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) { const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 }; return vocab.token_to_id.at(buf); } + case LLAMA_VOCAB_TYPE_WPM: case LLAMA_VOCAB_TYPE_BPE: { return vocab.token_to_id.at(bytes_to_unicode_bpe(ch)); } @@ -7936,12 +8164,212 @@ private: llm_bigram_bpe::queue work_queue; }; -typedef enum FRAGMENT_BUFFER_VARIANT_TYPE{ +struct llm_tokenizer_wpm { + llm_tokenizer_wpm(const llama_vocab & vocab): vocab(vocab) {} + + void tokenize(const std::string & text, std::vector & output) { + auto * token_map = &vocab.token_to_id; + + // normalize and split by whitespace + std::vector words = preprocess(text); + + // bos token prepended already + + // find the longest tokens that form the words + for (const std::string &word : words) { + // skip empty words + if (word.size() == 0) { + continue; + } + + // prepend phantom space + std::string word1 = "\xe2\x96\x81" + word; + int n = word1.size(); + + // we're at the start of a new word + int i = 0; + bool match_any = false; + + // move through character position in word + while (i < n) { + // loop through possible match length + bool match = false; + for (int j = n; j > i; j--) { + auto it = token_map->find(word1.substr(i, j - i)); + if (it != token_map->end()) { + output.push_back(it->second); + match = true; + match_any = true; + i = j; + break; + } + } + + // must be an unknown character + if (!match) { + i++; + } + } + + // we didn't find any matches for this word + if (!match_any) { + output.push_back(vocab.special_unk_id); + } + } + + // append eos token + output.push_back(vocab.special_eos_id); + } + + std::vector preprocess(const std::string & text) { + std::string ori_str = normalize(text); + uint64_t ori_size = ori_str.size(); + + // single punct / single symbol / single digit + // baseline: add whitespace on the left and right of punct and chinese characters + std::vector words; + std::string new_str = ""; + uint64_t i = 0; + while (i < ori_size) { + int utf_char_len = utf8_len(ori_str[i]); + if ((utf_char_len == 1) && ispunct(ori_str[i])) { + new_str += " "; + new_str += ori_str[i]; + new_str += " "; + i += 1; + } + else if ((utf_char_len == 3) && is_chinese_char(ori_str.substr(i, 3))) { + new_str += " "; + new_str += ori_str.substr(i, 3); + new_str += " "; + i += 3; + } + else { + new_str += ori_str[i]; + i += 1; + } + } + + // split by whitespace + uint64_t l = 0; + uint64_t r = 0; + while (r < new_str.size()) { + // if is whitespace + if (isspace(new_str[r])) { + if (r > l) words.push_back(new_str.substr(l, (r - l))); + l = r + 1; + r = l; + } + else { + r += 1; + } + } + if (r > l) { + words.push_back(new_str.substr(l, (r - l))); + } + return words; + } + + std::string normalize(const std::string & text) { + // TODO: handle chinese characters? https://github.com/huggingface/tokenizers/blob/ef5f50605ddf9f8caef1598c0e4853862b9707a7/tokenizers/src/normalizers/bert.rs#L98 + std::string text2 = strip_accents(text); + for (size_t i = 0; i < text2.size(); i += utf8_len(text2[i])) { + char c = text2[i]; + if (c >= 'A' && c <= 'Z') { + text2[i] = c - 'A' + 'a'; + } + } + return text2; + } + + bool is_chinese_char(const std::string & str) { + int len = str.length(); + unsigned int codepoint = 0; + int num_bytes = 0; + int i = 0; + unsigned char ch = static_cast(str[i]); + if (ch <= 0x7f) { + codepoint = ch; + num_bytes = 1; + } else if ((ch >> 5) == 0x06) { + codepoint = ch & 0x1f; + num_bytes = 2; + } else if ((ch >> 4) == 0x0e) { + codepoint = ch & 0x0f; + num_bytes = 3; + } else if ((ch >> 3) == 0x1e) { + codepoint = ch & 0x07; + num_bytes = 4; + } + for (int j = 1; j < num_bytes; ++j) { + if (i + j >= len) { + return false; // incomplete UTF-8 character + } + unsigned char next_ch = static_cast(str[i + j]); + if ((next_ch >> 6) != 0x02) { + return false; // invalid trailing byte + } + codepoint = (codepoint << 6) | (next_ch & 0x3f); + } + if ((codepoint >= 0x4E00 && codepoint <= 0x9FFF) || + (codepoint >= 0x3400 && codepoint <= 0x4DBF) || + (codepoint >= 0x20000 && codepoint <= 0x2A6DF) || + (codepoint >= 0x2A700 && codepoint <= 0x2B73F) || + (codepoint >= 0x2B740 && codepoint <= 0x2B81F) || + (codepoint >= 0x2B920 && codepoint <= 0x2CEAF) || // this should be 0x2B820 but in hf rust code it is 0x2B920 + (codepoint >= 0xF900 && codepoint <= 0xFAFF) || + (codepoint >= 0x2F800 && codepoint <= 0x2FA1F) || + (codepoint >= 0x3000 && codepoint <= 0x303F) || + (codepoint >= 0xFF00 && codepoint <= 0xFFEF)) { + return true; // NOLINT + } + return false; + } + + std::string strip_accents(const std::string & input_string) { + std::string resultString; + std::map accent_map = { + {"À", 'A'}, {"Á", 'A'}, {"Â", 'A'}, {"Ã", 'A'}, {"Ä", 'A'}, {"Å", 'A'}, + {"à", 'a'}, {"á", 'a'}, {"â", 'a'}, {"ã", 'a'}, {"ä", 'a'}, {"å", 'a'}, + {"È", 'E'}, {"É", 'E'}, {"Ê", 'E'}, {"Ë", 'E'}, {"è", 'e'}, {"é", 'e'}, + {"ê", 'e'}, {"ë", 'e'}, {"Ì", 'I'}, {"Í", 'I'}, {"Î", 'I'}, {"Ï", 'I'}, + {"ì", 'i'}, {"í", 'i'}, {"î", 'i'}, {"ï", 'i'}, {"Ò", 'O'}, {"Ó", 'O'}, + {"Ô", 'O'}, {"Õ", 'O'}, {"Ö", 'O'}, {"ò", 'o'}, {"ó", 'o'}, {"ô", 'o'}, + {"õ", 'o'}, {"ö", 'o'}, {"Ù", 'U'}, {"Ú", 'U'}, {"Û", 'U'}, {"Ü", 'U'}, + {"ù", 'u'}, {"ú", 'u'}, {"û", 'u'}, {"ü", 'u'}, {"Ý", 'Y'}, {"ý", 'y'}, + {"Ç", 'C'}, {"ç", 'c'}, {"Ñ", 'N'}, {"ñ", 'n'}, + }; + + for (size_t i = 0; i < input_string.length();) { + int len = utf8_len(input_string[i]); + std::string curChar = input_string.substr(i, len); + auto iter = accent_map.find(curChar); + if (iter != accent_map.end()) { + resultString += iter->second; + } else { + resultString += curChar; + } + i += len; + } + + return resultString; + } + + static size_t utf8_len(char src) { + const size_t lookup[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4}; + uint8_t highbits = static_cast(src) >> 4; + return lookup[highbits]; + } + + const llama_vocab & vocab; +}; + +typedef enum FRAGMENT_BUFFER_VARIANT_TYPE { FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN, FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT } FRAGMENT_BUFFER_VARIANT_TYPE; -struct fragment_buffer_variant{ +struct fragment_buffer_variant { fragment_buffer_variant(llama_vocab::id _token) : type(FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN), @@ -7971,8 +8399,7 @@ struct fragment_buffer_variant{ // #define PRETOKENIZERDEBUG -static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list & buffer) -{ +static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list & buffer) { // for each special token for (const auto & st: vocab.special_tokens_cache) { const auto & special_token = st.first; @@ -8090,10 +8517,8 @@ static std::vector llama_tokenize_internal(const llama_vocab & switch (vocab.type) { case LLAMA_VOCAB_TYPE_SPM: { - for (const auto & fragment: fragment_buffer) - { - if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) - { + for (const auto & fragment: fragment_buffer) { + if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) { // without adding this leading whitespace, we do not get the same results as the original tokenizer // TODO: It's likely possible to get rid of this string copy entirely @@ -8113,19 +8538,15 @@ static std::vector llama_tokenize_internal(const llama_vocab & llm_tokenizer_spm tokenizer(vocab); llama_escape_whitespace(raw_text); tokenizer.tokenize(raw_text, output); - } - else // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN) - { + } else { // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN) output.push_back(fragment.token); } } } break; case LLAMA_VOCAB_TYPE_BPE: { - for (const auto & fragment: fragment_buffer) - { - if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) - { + for (const auto & fragment: fragment_buffer) { + if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) { auto raw_text = fragment.raw_text.substr(fragment.offset, fragment.length); #ifdef PRETOKENIZERDEBUG @@ -8133,9 +8554,23 @@ static std::vector llama_tokenize_internal(const llama_vocab & #endif llm_tokenizer_bpe tokenizer(vocab); tokenizer.tokenize(raw_text, output); + } else { // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN) + output.push_back(fragment.token); } - else // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN) - { + } + } break; + case LLAMA_VOCAB_TYPE_WPM: + { + for (const auto & fragment: fragment_buffer) { + if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) { + auto raw_text = fragment.raw_text.substr(fragment.offset, fragment.length); + +#ifdef PRETOKENIZERDEBUG + LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str()); +#endif + llm_tokenizer_wpm tokenizer(vocab); + tokenizer.tokenize(raw_text, output); + } else { // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN) output.push_back(fragment.token); } } @@ -10799,7 +11234,7 @@ struct llama_context * llama_new_context_with_model( // graph inputs { ggml_init_params init_params = { - /* .mem_size */ ggml_tensor_overhead()*5, + /* .mem_size */ ggml_tensor_overhead()*7, /* .mem_buffer */ nullptr, /* .no_alloc */ true, }; @@ -10810,12 +11245,14 @@ struct llama_context * llama_new_context_with_model( ctx->inp_pos = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch); ctx->inp_KQ_mask = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, cparams.n_ctx, cparams.n_batch); ctx->inp_K_shift = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_ctx); + ctx->inp_sum = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, 1, cparams.n_batch); ggml_set_name(ctx->inp_tokens, "inp_tokens"); ggml_set_name(ctx->inp_embd, "inp_embd"); ggml_set_name(ctx->inp_pos, "inp_pos"); ggml_set_name(ctx->inp_KQ_mask, "inp_KQ_mask"); ggml_set_name(ctx->inp_K_shift, "inp_K_shift"); + ggml_set_name(ctx->inp_sum, "inp_sum"); ctx->buf_input = ggml_backend_alloc_ctx_tensors_from_buft(ctx->ctx_input, llama_default_buffer_type_cpu(true)); @@ -11746,6 +12183,7 @@ static std::string llama_decode_text(const std::string & text) { int32_t llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int32_t length) { if (0 <= token && token < llama_n_vocab(model)) { switch (llama_vocab_get_type(model->vocab)) { + case LLAMA_VOCAB_TYPE_WPM: case LLAMA_VOCAB_TYPE_SPM: { // NOTE: we accept all unsupported token types, // suppressing them like CONTROL tokens. diff --git a/llama.h b/llama.h index cec4158bc..367e8f1a1 100644 --- a/llama.h +++ b/llama.h @@ -61,6 +61,7 @@ extern "C" { enum llama_vocab_type { LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding + LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece }; enum llama_token_type { From 3bdc4cd0f595a6096cca4a64aa75ffa8a3503465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Sun, 11 Feb 2024 19:08:39 +0100 Subject: [PATCH 88/94] CUDA: mul_mat_vec_q tiling, refactor mul mat logic (#5434) * CUDA: mul_mat_vec_q tiling, refactor mul mat logic Co-authored-by: slaren --------- Co-authored-by: slaren --- ggml-cuda.cu | 265 +++++++++++++++++++++++++++++---------------------- 1 file changed, 149 insertions(+), 116 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 5053757e6..96976f248 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -150,8 +150,8 @@ #define CUDA_USE_TENSOR_CORES #endif -// max batch size to use MMQ kernels when tensor cores are available -#define MMQ_MAX_BATCH_SIZE 32 +#define MMVQ_MAX_BATCH_SIZE 8 // max batch size to use MMVQ kernels +#define MMQ_MAX_BATCH_SIZE 32 // max batch size to use MMQ kernels when tensor cores are available #if defined(GGML_USE_HIPBLAS) #define __CUDA_ARCH__ 1300 @@ -5310,51 +5310,59 @@ template static __global__ void #endif // __CUDA_ARCH__ >= CC_VOLTA } -#define MMVQ_NWARPS_NVIDIA 4 -#define MMVQ_NWARPS_AMD_RDNA2 1 -#define MMVQ_NWARPS_AMD_OLD 4 - -template +template #if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) -__launch_bounds__(nwarps*WARP_SIZE, 1) // tells the compiler to use as many registers as it wants +// tell the compiler to use as many registers as it wants, see nwarps definition below +__launch_bounds__((ncols_y <= 4 ? 4 : 2)*WARP_SIZE, 1) #endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) static __global__ void mul_mat_vec_q( const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, - const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y_par, const int nrows_dst) { + const int ncols_x, const int nrows_x, const int nrows_y, const int nrows_dst) { - const int ncols_y = ncols_y_template != 0 ? ncols_y_template : ncols_y_par; +#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) && (defined(RDNA2) || defined(RDNA3)) + constexpr int nwarps = 1; + constexpr int rows_per_cuda_block = 1; +#else + constexpr int nwarps = ncols_y <= 4 ? 4 : 2; + constexpr int rows_per_cuda_block = ncols_y == 1 ? 1 : 2; +#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) && !defined(RDNA2) && !defined(RDNA3) - const int tid = WARP_SIZE*threadIdx.y + threadIdx.x; - const int row = blockIdx.x; - - const int blocks_per_row_x = ncols_x / qk; - const int blocks_per_col_y = nrows_y / QK8_1; - const int blocks_per_iter = vdr * nwarps*WARP_SIZE / qi; + const int tid = WARP_SIZE*threadIdx.y + threadIdx.x; + const int row0 = rows_per_cuda_block*blockIdx.x; + const int blocks_per_row_x = ncols_x / qk; + const int blocks_per_col_y = nrows_y / QK8_1; + constexpr int blocks_per_iter = vdr * nwarps*WARP_SIZE / qi; // partial sum for each thread - float tmp[ncols_y_template != 0 ? ncols_y_template : 8] = {0.0f}; + float tmp[ncols_y][rows_per_cuda_block] = {0.0f}; const block_q_t * x = (const block_q_t *) vx; const block_q8_1 * y = (const block_q8_1 *) vy; - for (int i = tid / (qi/vdr); i < blocks_per_row_x; i += blocks_per_iter) { - const int ibx = row*blocks_per_row_x + i; // x block index + for (int kbx = tid / (qi/vdr); kbx < blocks_per_row_x; kbx += blocks_per_iter) { + const int kby = kbx * (qk/QK8_1); // y block index that aligns with kbx - const int iby = i * (qk/QK8_1); // y block index that aligns with ibx - - const int iqs = vdr * (tid % (qi/vdr)); // x block quant index when casting the quants to int + // x block quant index when casting the quants to int + const int kqs = vdr * (tid % (qi/vdr)); #pragma unroll for (int j = 0; j < ncols_y; ++j) { - tmp[j] += vec_dot_q_cuda(&x[ibx], &y[j*blocks_per_col_y + iby], iqs); +#pragma unroll + for (int i = 0; i < rows_per_cuda_block; ++i) { + tmp[j][i] += vec_dot_q_cuda( + &x[kbx + (row0 + i)*blocks_per_row_x], &y[j*blocks_per_col_y + kby], kqs); + } } } - __shared__ float tmp_shared[nwarps-1 > 0 ? nwarps-1 : 1][ncols_y_template != 0 ? ncols_y_template : 8][WARP_SIZE]; + __shared__ float tmp_shared[nwarps-1 > 0 ? nwarps-1 : 1][ncols_y][rows_per_cuda_block][WARP_SIZE]; if (threadIdx.y > 0) { #pragma unroll for (int j = 0; j < ncols_y; ++j) { - tmp_shared[threadIdx.y-1][j][threadIdx.x] = tmp[j]; +#pragma unroll + for (int i = 0; i < rows_per_cuda_block; ++i) { + tmp_shared[threadIdx.y-1][j][i][threadIdx.x] = tmp[j][i]; + } } } __syncthreads(); @@ -5366,13 +5374,16 @@ static __global__ void mul_mat_vec_q( #pragma unroll for (int j = 0; j < ncols_y; ++j) { #pragma unroll - for (int i = 0; i < nwarps-1; ++i) { - tmp[j] += tmp_shared[i][j][threadIdx.x]; + for (int i = 0; i < rows_per_cuda_block; ++i) { +#pragma unroll + for (int l = 0; l < nwarps-1; ++l) { + tmp[j][i] += tmp_shared[l][j][i][threadIdx.x]; + } + tmp[j][i] = warp_reduce_sum(tmp[j][i]); } - tmp[j] = warp_reduce_sum(tmp[j]); - if (threadIdx.x == 0) { - dst[j*nrows_dst + row] = tmp[j]; + if (threadIdx.x < rows_per_cuda_block) { + dst[j*nrows_dst + row0 + threadIdx.x] = tmp[j][threadIdx.x]; } } } @@ -6851,65 +6862,75 @@ static void mul_mat_vec_q_cuda( const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst, cudaStream_t stream) { GGML_ASSERT(ncols_x % qk == 0); - GGML_ASSERT(ncols_y <= 4); + GGML_ASSERT(ncols_y <= MMVQ_MAX_BATCH_SIZE); int id; CUDA_CHECK(cudaGetDevice(&id)); - int nwarps; - if (g_device_caps[id].cc >= CC_OFFSET_AMD) { - nwarps = g_device_caps[id].cc >= CC_RDNA2 ? MMVQ_NWARPS_AMD_RDNA2 : MMVQ_NWARPS_AMD_OLD; - } else { - nwarps = MMVQ_NWARPS_NVIDIA; - } + int64_t nwarps = 1; + int64_t rows_per_cuda_block = 1; - const dim3 block_nums(nrows_x, 1, 1); + if (g_device_caps[id].cc < CC_RDNA2) { // NVIDIA and AMD older than RDNA2 + switch(ncols_y) { + case 1: + nwarps = 4; + rows_per_cuda_block = 1; + break; + case 2: + case 3: + case 4: + nwarps = 4; + rows_per_cuda_block = 2; + break; + case 5: + case 6: + case 7: + case 8: + nwarps = 2; + rows_per_cuda_block = 2; + break; + default: + GGML_ASSERT(false); + break; + } + } + const int64_t nblocks = (nrows_x + rows_per_cuda_block - 1) / rows_per_cuda_block; + const dim3 block_nums(nblocks, 1, 1); const dim3 block_dims(WARP_SIZE, nwarps, 1); - switch (nwarps) { - case 1: switch(ncols_y) { - case 1: - mul_mat_vec_q<1, 1, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - break; - case 2: - mul_mat_vec_q<1, 2, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - break; - case 3: - mul_mat_vec_q<1, 3, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - break; - case 4: - mul_mat_vec_q<1, 4, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - break; - default: - GGML_ASSERT(false); - break; - } break; - case 4: switch(ncols_y) { - case 1: - mul_mat_vec_q<4, 1, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - break; - case 2: - mul_mat_vec_q<4, 2, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - break; - case 3: - mul_mat_vec_q<4, 3, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - break; - case 4: - mul_mat_vec_q<4, 4, qk, qi, block_q_t, vdr, vec_dot> - <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst); - break; - default: - GGML_ASSERT(false); - break; - } break; - + switch (ncols_y) { + case 1: + mul_mat_vec_q<1, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, nrows_dst); + break; + case 2: + mul_mat_vec_q<2, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, nrows_dst); + break; + case 3: + mul_mat_vec_q<3, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, nrows_dst); + break; + case 4: + mul_mat_vec_q<4, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, nrows_dst); + break; + case 5: + mul_mat_vec_q<5, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, nrows_dst); + break; + case 6: + mul_mat_vec_q<6, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, nrows_dst); + break; + case 7: + mul_mat_vec_q<7, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, nrows_dst); + break; + case 8: + mul_mat_vec_q<8, qk, qi, block_q_t, vdr, vec_dot> + <<>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, nrows_dst); + break; default: GGML_ASSERT(false); break; @@ -9735,7 +9756,7 @@ static __global__ void k_compute_batched_ptrs( ptrs_dst[0*ne23 + i12 + i13*ne12] = ( char *) dst + i12*nbd2 + i13*nbd3; } -static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_cuda_mul_mat_batched_cublas(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { GGML_ASSERT(!ggml_is_transposed(src0)); GGML_ASSERT(!ggml_is_transposed(src1)); @@ -9893,39 +9914,69 @@ static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1 int64_t min_compute_capability = INT_MAX; + bool any_pascal_with_slow_fp16 = false; if (split) { ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) src0->buffer->buft->context; auto & tensor_split = buft_ctx->tensor_split; for (int id = 0; id < g_device_count; ++id) { - if (min_compute_capability > g_device_caps[id].cc && tensor_split[id] < (id + 1 < g_device_count ? tensor_split[id + 1] : 1.0f)) { + // skip devices that are not going to do any work: + if (tensor_split[id] >= (id + 1 < g_device_count ? tensor_split[id + 1] : 1.0f)) { + continue; + } + + if (min_compute_capability > g_device_caps[id].cc) { min_compute_capability = g_device_caps[id].cc; } + if (g_device_caps[id].cc == 610) { + any_pascal_with_slow_fp16 = true; + } } } else { - min_compute_capability = g_device_caps[g_main_device].cc; + min_compute_capability = g_device_caps[g_main_device].cc; + any_pascal_with_slow_fp16 = g_device_caps[g_main_device].cc == 610; } + // check data types and tensor shapes for custom matrix multiplication kernels: + bool use_dequantize_mul_mat_vec = (ggml_is_quantized(src0->type) || src0->type == GGML_TYPE_F16) + && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 + && src0->ne[0] % GGML_CUDA_DMMV_X == 0 && src1->ne[1] == 1; + + bool use_mul_mat_vec_q = ggml_is_quantized(src0->type) + && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 + && src1->ne[1] <= MMVQ_MAX_BATCH_SIZE; + + bool use_mul_mat_q = ggml_cuda_supports_mmq(src0->type) + && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32; + #if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) const bool fp16_performance_good = min_compute_capability >= CC_RDNA1; - bool use_mul_mat_q = ggml_is_quantized(src0->type); + #ifdef CUDA_USE_TENSOR_CORES use_mul_mat_q = use_mul_mat_q && min_compute_capability < CC_RDNA3; #endif // CUDA_USE_TENSOR_CORES #else - const bool fp16_performance_good = min_compute_capability >= CC_VOLTA; - bool use_mul_mat_q = min_compute_capability >= MIN_CC_DP4A && ggml_is_quantized(src0->type); + // fp16 performance is good on Volta or newer and on P100 (compute capability 6.0) + const bool fp16_performance_good = min_compute_capability >= CC_PASCAL && !any_pascal_with_slow_fp16; + + // mmvq and mmq need the __dp4a instruction which on NVIDIA is only available for CC >= 6.1 + use_mul_mat_vec_q = use_mul_mat_vec_q && min_compute_capability >= MIN_CC_DP4A; + use_mul_mat_q = use_mul_mat_q && min_compute_capability >= MIN_CC_DP4A; + #ifdef CUDA_USE_TENSOR_CORES // when tensor cores are available, use them for large batch size // ref: https://github.com/ggerganov/llama.cpp/pull/3776 - use_mul_mat_q = use_mul_mat_q && !(fp16_performance_good && src1->ne[1] > MMQ_MAX_BATCH_SIZE); + use_mul_mat_q = use_mul_mat_q && (!fp16_performance_good || src1->ne[1] <= MMQ_MAX_BATCH_SIZE); #endif // CUDA_USE_TENSOR_CORES #endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) - use_mul_mat_q = use_mul_mat_q && ggml_cuda_supports_mmq(src0->type); + // if mmvq is available it's a better choice than dmmv: +#ifndef GGML_CUDA_FORCE_DMMV + use_dequantize_mul_mat_vec = use_dequantize_mul_mat_vec && !use_mul_mat_vec_q; +#endif // GGML_CUDA_FORCE_DMMV // debug helpers //printf("src0: %8d %8d %8d %8d\n", src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3]); @@ -9943,33 +9994,15 @@ static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1 ggml_cuda_mul_mat_vec_nc(src0, src1, dst); } else if (!split && all_on_device && fp16_performance_good && src0->type == GGML_TYPE_F16 && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) { // KQ + KQV multi-batch - ggml_cuda_mul_mat_mat_batched_cublas(src0, src1, dst); - } else if (src0->type == GGML_TYPE_F32) { - ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_cublas, false); - } else if (ggml_is_quantized(src0->type) || src0->type == GGML_TYPE_F16) { - if (src1->ne[1] == 1 && src0->ne[0] % GGML_CUDA_DMMV_X == 0 && src1->type == GGML_TYPE_F32) { -#ifdef GGML_CUDA_FORCE_DMMV - const bool use_mul_mat_vec_q = false; -#else - const bool use_mul_mat_vec_q = min_compute_capability >= MIN_CC_DP4A && ggml_is_quantized(src0->type); -#endif // GGML_CUDA_FORCE_DMMV - - if (use_mul_mat_vec_q) { - ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_vec_q, true); - } else { - ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_dequantize_mul_mat_vec, false); - } - } else { - if (src1->ne[1] <= 4 && min_compute_capability >= MIN_CC_DP4A && ggml_is_quantized(src0->type) && src1->type == GGML_TYPE_F32) { - ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_vec_q, true); - } else if (use_mul_mat_q) { - ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_q, true); - } else { - ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_cublas, false); - } - } + ggml_cuda_mul_mat_batched_cublas(src0, src1, dst); + } else if (use_dequantize_mul_mat_vec) { + ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_dequantize_mul_mat_vec, false); + } else if (use_mul_mat_vec_q) { + ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_vec_q, true); + } else if (use_mul_mat_q) { + ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_q, true); } else { - GGML_ASSERT(false); + ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_cublas, false); } } From 3b169441dfe8e420f88d1592708cc2a871daadb9 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 12 Feb 2024 09:16:06 +0200 Subject: [PATCH 89/94] sync : ggml (#5452) * ggml-alloc : v3 (ggml/727) * ggml-alloc v3 ggml-ci * fix ci ggml-ci * whisper : check for backend buffer allocation failures * whisper : avoid leaks when initialization fails * cleanup ggml-ci * style fixes ggml-ci * sync : ggml * update llama.cpp, clip.cpp, export-lora.cpp * update finetune.cpp, train-text-from-scratch.cpp ggml-ci * ggml-backend : reduce alignment to 32 to match gguf and fix mmap --------- Co-authored-by: slaren --- examples/export-lora/export-lora.cpp | 19 +- examples/finetune/finetune.cpp | 147 +- examples/llava/clip.cpp | 152 +- .../train-text-from-scratch.cpp | 112 +- ggml-alloc.c | 1373 +++++++++-------- ggml-alloc.h | 110 +- ggml-backend.c | 492 +++--- ggml-backend.h | 15 +- ggml.c | 28 +- ggml.h | 18 +- llama.cpp | 181 +-- scripts/sync-ggml.last | 2 +- 12 files changed, 1287 insertions(+), 1362 deletions(-) diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp index 4cd5d99bb..2f7be8a13 100644 --- a/examples/export-lora/export-lora.cpp +++ b/examples/export-lora/export-lora.cpp @@ -337,24 +337,14 @@ static bool apply_lora(struct ggml_tensor * tensor, struct lora_data * lora, int params.mem_buffer = NULL; params.no_alloc = true; struct ggml_context * ctx = NULL; - struct ggml_allocr * alloc = NULL; - struct ggml_cgraph * gf = NULL; + struct ggml_gallocr * alloc = NULL; + struct ggml_cgraph * gf = NULL; ctx = ggml_init(params); - alloc = ggml_allocr_new_measure(tensor_alignment); + alloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type()); gf = build_graph_lora(ctx, tensor, lora_a, lora_b, scaling); - size_t alloc_size = ggml_allocr_alloc_graph(alloc, gf); - ggml_allocr_free(alloc); - ggml_free(ctx); - static std::vector data_compute; - data_compute.resize(alloc_size + tensor_alignment); - - ctx = ggml_init(params); - alloc = ggml_allocr_new(data_compute.data(), data_compute.size(), tensor_alignment); - gf = build_graph_lora(ctx, tensor, lora_a, lora_b, scaling); - ggml_allocr_alloc_graph(alloc, gf); - ggml_allocr_free(alloc); + ggml_gallocr_alloc_graph(alloc, gf); struct ggml_cplan cplan = ggml_graph_plan(gf, n_threads); static std::vector data_work; @@ -363,6 +353,7 @@ static bool apply_lora(struct ggml_tensor * tensor, struct lora_data * lora, int ggml_graph_compute(gf, &cplan); + ggml_gallocr_free(alloc); ggml_free(ctx); return true; } diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index b7e19c5fe..b11c56020 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -1,5 +1,6 @@ #include "ggml.h" #include "ggml-alloc.h" +#include "ggml-backend.h" #include "llama.h" #include "common.h" #include "train.h" @@ -13,8 +14,6 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif -static const size_t tensor_alignment = 32; - struct my_llama_hparams { uint32_t n_vocab = 32000; uint32_t n_ctx = 512; @@ -128,7 +127,7 @@ struct my_llama_lora_layer { struct my_llama_lora { struct ggml_context * ctx = NULL; - std::vector data; + ggml_backend_buffer_t data; my_llama_lora_hparams hparams; @@ -372,63 +371,6 @@ static void set_param_lora(struct my_llama_lora * lora) { } } -static void alloc_lora(struct ggml_allocr * alloc, struct my_llama_lora * lora) { - ggml_allocr_alloc(alloc, lora->tok_embeddings_a); - ggml_allocr_alloc(alloc, lora->tok_embeddings_b); - ggml_allocr_alloc(alloc, lora->norm_a); - ggml_allocr_alloc(alloc, lora->norm_b); - ggml_allocr_alloc(alloc, lora->output_a); - ggml_allocr_alloc(alloc, lora->output_b); - for (uint32_t i = 0; i < lora->layers.size(); ++i) { - auto & layer = lora->layers[i]; - ggml_allocr_alloc(alloc, layer.attention_norm_a); - ggml_allocr_alloc(alloc, layer.attention_norm_b); - ggml_allocr_alloc(alloc, layer.wq_a); - ggml_allocr_alloc(alloc, layer.wq_b); - ggml_allocr_alloc(alloc, layer.wk_a); - ggml_allocr_alloc(alloc, layer.wk_b); - ggml_allocr_alloc(alloc, layer.wv_a); - ggml_allocr_alloc(alloc, layer.wv_b); - ggml_allocr_alloc(alloc, layer.wo_a); - ggml_allocr_alloc(alloc, layer.wo_b); - ggml_allocr_alloc(alloc, layer.ffn_norm_a); - ggml_allocr_alloc(alloc, layer.ffn_norm_b); - ggml_allocr_alloc(alloc, layer.w1_a); - ggml_allocr_alloc(alloc, layer.w1_b); - ggml_allocr_alloc(alloc, layer.w2_a); - ggml_allocr_alloc(alloc, layer.w2_b); - ggml_allocr_alloc(alloc, layer.w3_a); - ggml_allocr_alloc(alloc, layer.w3_b); - } - ggml_allocr_alloc(alloc, lora->tok_embeddings_a->grad); - ggml_allocr_alloc(alloc, lora->tok_embeddings_b->grad); - ggml_allocr_alloc(alloc, lora->norm_a->grad); - ggml_allocr_alloc(alloc, lora->norm_b->grad); - ggml_allocr_alloc(alloc, lora->output_a->grad); - ggml_allocr_alloc(alloc, lora->output_b->grad); - for (uint32_t i = 0; i < lora->layers.size(); ++i) { - auto & layer = lora->layers[i]; - ggml_allocr_alloc(alloc, layer.attention_norm_a->grad); - ggml_allocr_alloc(alloc, layer.attention_norm_b->grad); - ggml_allocr_alloc(alloc, layer.wq_a->grad); - ggml_allocr_alloc(alloc, layer.wq_b->grad); - ggml_allocr_alloc(alloc, layer.wk_a->grad); - ggml_allocr_alloc(alloc, layer.wk_b->grad); - ggml_allocr_alloc(alloc, layer.wv_a->grad); - ggml_allocr_alloc(alloc, layer.wv_b->grad); - ggml_allocr_alloc(alloc, layer.wo_a->grad); - ggml_allocr_alloc(alloc, layer.wo_b->grad); - ggml_allocr_alloc(alloc, layer.ffn_norm_a->grad); - ggml_allocr_alloc(alloc, layer.ffn_norm_b->grad); - ggml_allocr_alloc(alloc, layer.w1_a->grad); - ggml_allocr_alloc(alloc, layer.w1_b->grad); - ggml_allocr_alloc(alloc, layer.w2_a->grad); - ggml_allocr_alloc(alloc, layer.w2_b->grad); - ggml_allocr_alloc(alloc, layer.w3_a->grad); - ggml_allocr_alloc(alloc, layer.w3_b->grad); - } -} - static void init_lora(const struct my_llama_model * model, struct my_llama_lora * lora) { const auto & lparams = lora->hparams; @@ -522,18 +464,8 @@ static void init_lora(const struct my_llama_model * model, struct my_llama_lora set_param_lora(lora); - // measure data size - size_t size = 0; - for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { - size += GGML_PAD(ggml_nbytes(t), tensor_alignment); - } - - // allocate data - struct ggml_allocr * alloc = NULL; - lora->data.resize(size + tensor_alignment); - alloc = ggml_allocr_new(lora->data.data(), lora->data.size(), tensor_alignment); - alloc_lora(alloc, lora); - ggml_allocr_free(alloc); + // allocate data for lora tensors + lora->data = ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_cpu_buffer_type()); } static void randomize_lora(struct my_llama_lora * lora, int seed, float mean, float std, float min, float max) { @@ -579,7 +511,7 @@ static void randomize_lora(struct my_llama_lora * lora, int seed, float mean, fl static struct ggml_tensor * llama_build_lora_finetune_graphs( struct my_llama_model * model, struct my_llama_lora * lora, - struct ggml_allocr * alloc, + ggml_gallocr_t alloc, struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, @@ -590,7 +522,8 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs( const int n_tokens, const int n_batch, const bool enable_flash_attn, - const bool enable_checkpointing) { + const bool enable_checkpointing, + const bool measure_only) { ggml_set_scratch(ctx, { 0, 0, nullptr, }); const int n_past = 0; @@ -622,13 +555,7 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs( // KQ_pos - contains the positions struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N); - ggml_allocr_alloc(alloc, KQ_pos); - if (!ggml_allocr_is_measure(alloc)) { - int * data = (int *) KQ_pos->data; - for (int i = 0; i < N; ++i) { - data[i] = n_past + i; - } - } + ggml_set_input(KQ_pos); // rope has so much parameters that we make a custom function for it auto rope = [ctx, KQ_pos, n_rot, n_ctx, rope_freq_base, rope_freq_scale] @@ -780,7 +707,7 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs( // input gradient ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36->grad, 1.0f)); GGML_ASSERT(t36->grad->data == NULL && t36->grad->view_src == NULL); - ggml_allocr_alloc(alloc, t36->grad); + ggml_set_input(t36->grad); // KQ_pos ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, KQ_pos, 1.0f)); @@ -805,11 +732,23 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs( // note: they will be freed in reverse order for (unsigned int i = 0; i < checkpoints.size(); ++i) { if (checkpoints[i]->data == NULL && checkpoints[i]->view_src == NULL) { - ggml_allocr_alloc(alloc, checkpoints[i]); + ggml_set_input(checkpoints[i]); } } - ggml_allocr_alloc_graph(alloc, gb); + if (measure_only) { + ggml_gallocr_reserve(alloc, gb); + } else { + ggml_gallocr_alloc_graph(alloc, gb); + + // set KQ_pos + { + int * data = (int *) KQ_pos->data; + for (int i = 0; i < N; ++i) { + data[i] = n_past + i; + } + } + } // remove the additional nodes and leafs for (int i = n_leafs_before; i < gb->n_leafs; ++i) { @@ -1663,7 +1602,7 @@ int main(int argc, char ** argv) { printf("%s: seen train_samples %llu\n", __func__, (long long unsigned) train->train_samples); printf("%s: seen train_tokens %llu\n", __func__, (long long unsigned) train->train_tokens); printf("%s: completed train_epochs %llu\n", __func__, (long long unsigned) train->train_epochs); - printf("%s: lora_size = %zu bytes (%.1f MB)\n", __func__, (ggml_used_mem(lora.ctx) + lora.data.size()), (float) (ggml_used_mem(lora.ctx) + lora.data.size()) / (1024.0f*1024.0f)); + printf("%s: lora_size = %zu bytes (%.1f MB)\n", __func__, (ggml_used_mem(lora.ctx) + ggml_backend_buffer_get_size(lora.data)), (float) (ggml_used_mem(lora.ctx) + ggml_backend_buffer_get_size(lora.data)) / (1024.0f*1024.0f)); if (params.only_write_lora) { save_train_files_data save_data; @@ -1690,10 +1629,6 @@ int main(int argc, char ** argv) { int n_vocab = model.hparams.n_vocab; int n_batch = params.common.n_batch; - - std::vector mem_input_data; - std::vector mem_compute_data; - // context for input tensors without their data struct ggml_init_params ctx_input_params = { ggml_tensor_overhead() * 2, // mem_size @@ -1706,17 +1641,11 @@ int main(int argc, char ** argv) { struct ggml_tensor * tokens_input = ggml_new_tensor_2d(ctx_input, GGML_TYPE_I32, n_tokens, n_batch); struct ggml_tensor * target_probs = ggml_new_tensor_3d(ctx_input, GGML_TYPE_F32, n_vocab, n_tokens, n_batch); - // measure required memory for input tensors - size_t max_input_size = GGML_PAD(ggml_nbytes(tokens_input), tensor_alignment) + - GGML_PAD(ggml_nbytes(target_probs), tensor_alignment) + - tensor_alignment; - printf("%s: input_size = %zu bytes (%.1f MB)\n", __func__, max_input_size, (float) max_input_size / (1024.0f*1024.0f)); - // allocate input tensors - mem_input_data.resize(max_input_size); - ggml_allocr_t alloc_inps = ggml_allocr_new(mem_input_data.data(), mem_input_data.size(), tensor_alignment); - ggml_allocr_alloc(alloc_inps, tokens_input); - ggml_allocr_alloc(alloc_inps, target_probs); + // measure required memory for input tensors + ggml_backend_buffer_t input_data = ggml_backend_alloc_ctx_tensors_from_buft(ctx_input, ggml_backend_cpu_buffer_type()); + size_t max_input_size = ggml_backend_buffer_get_size(input_data); + printf("%s: input_size = %zu bytes (%.1f MB)\n", __func__, max_input_size, (float) max_input_size / (1024.0f*1024.0f)); // context for compute tensors without their data const size_t estimated_compute_size_wo_data = ( @@ -1743,7 +1672,7 @@ int main(int argc, char ** argv) { // find best evaluation order for (unsigned order = 0; order < (unsigned) GGML_CGRAPH_EVAL_ORDER_COUNT; ++order) { ctx_compute = ggml_init(ctx_compute_params); - ggml_allocr_t alloc = ggml_allocr_new_measure(tensor_alignment); + ggml_gallocr_t alloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type()); gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); gf->order = (enum ggml_cgraph_eval_order) order; gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); @@ -1756,14 +1685,15 @@ int main(int argc, char ** argv) { &logits, tokens_input, target_probs, n_tokens, n_batch, params.common.use_flash, - params.common.use_checkpointing + params.common.use_checkpointing, + true ); - size_t max_compute_size = ggml_allocr_max_size(alloc) + tensor_alignment; + size_t max_compute_size = ggml_gallocr_get_buffer_size(alloc, 0); // FIXME: this will still allocate the buffer if (max_compute_size < best_compute_size) { best_compute_size = max_compute_size; best_order = gf->order; } - ggml_allocr_free(alloc); + ggml_gallocr_free(alloc); ggml_free(ctx_compute); } size_t max_compute_size = best_compute_size; @@ -1774,9 +1704,8 @@ int main(int argc, char ** argv) { "invalid"); // allocate compute tensors - mem_compute_data.resize(max_compute_size); ctx_compute = ggml_init(ctx_compute_params); - ggml_allocr_t alloc = ggml_allocr_new(mem_compute_data.data(), mem_compute_data.size(), tensor_alignment); + ggml_gallocr_t alloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type()); gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); gf->order = best_order; gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); @@ -1789,11 +1718,9 @@ int main(int argc, char ** argv) { &logits, tokens_input, target_probs, n_tokens, n_batch, params.common.use_flash, - params.common.use_checkpointing + params.common.use_checkpointing, + false ); - ggml_allocr_free(alloc); - ggml_allocr_free(alloc_inps); - // tokenize data std::vector train_tokens; @@ -1908,6 +1835,8 @@ int main(int argc, char ** argv) { ggml_free(ctx_work); ggml_free(ctx_compute); ggml_free(ctx_input); + ggml_gallocr_free(alloc); + int64_t t1 = ggml_time_ms(); printf("%s: total training time: ", __func__); diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 9129052a2..ccd0d85ad 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -367,7 +367,7 @@ struct clip_ctx { ggml_backend_buffer_t params_buffer = NULL; ggml_backend_buffer_t compute_buffer = NULL; ggml_backend_t backend = NULL; - ggml_allocr * compute_alloc = NULL; + ggml_gallocr_t compute_alloc = NULL; }; static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32_batch * imgs) { @@ -405,31 +405,8 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 struct ggml_cgraph * gf = ggml_new_graph(ctx0); struct ggml_tensor * inp_raw = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, image_size, image_size, 3, batch_size); - ggml_allocr_alloc(ctx->compute_alloc, inp_raw); - - if (!ggml_allocr_is_measure(ctx->compute_alloc)) { - float * data = (float *)malloc(ggml_nbytes(inp_raw)); - - for (size_t i = 0; i < imgs->size; i++) { - const int nx = imgs->data[i].nx; - const int ny = imgs->data[i].ny; - GGML_ASSERT(nx == image_size && ny == image_size); - - const int n = nx * ny; - - for (int b = 0; b < batch_size; b++) { - for (int k = 0; k < 3; k++) { - for (int y = 0; y < ny; y++) { - for (int x = 0; x < nx; x++) { - data[(b * 3 * n) + k * n + y * nx + x] = imgs->data[b].buf[3 * (y * nx + x) + k]; - } - } - } - } - } - ggml_backend_tensor_set(inp_raw, data, 0, ggml_nbytes(inp_raw)); - free(data); - } + ggml_set_name(inp_raw, "inp_raw"); + ggml_set_input(inp_raw); struct ggml_tensor * inp = ggml_conv_2d(ctx0, model.patch_embeddings, inp_raw, patch_size, patch_size, 0, 0, 1, 1); @@ -438,13 +415,8 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 // concat class_embeddings and patch_embeddings struct ggml_tensor * embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size); - ggml_allocr_alloc(ctx->compute_alloc, embeddings); - if (!ggml_allocr_is_measure(ctx->compute_alloc)) { - void* zero_mem = malloc(ggml_nbytes(embeddings)); - memset(zero_mem, 0, ggml_nbytes(embeddings)); - ggml_backend_tensor_set(embeddings, zero_mem, 0, ggml_nbytes(embeddings)); - free(zero_mem); - } + ggml_set_name(embeddings, "embeddings"); + ggml_set_input(embeddings); embeddings = ggml_acc(ctx0, embeddings, model.class_embedding, embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], 0); @@ -453,15 +425,8 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], model.class_embedding->nb[1]); struct ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_positions); - ggml_allocr_alloc(ctx->compute_alloc, positions); - if (!ggml_allocr_is_measure(ctx->compute_alloc)) { - int* positions_data = (int*)malloc(ggml_nbytes(positions)); - for (int i = 0; i < num_positions; i++) { - positions_data[i] = i; - } - ggml_backend_tensor_set(positions, positions_data, 0, ggml_nbytes(positions)); - free(positions_data); - } + ggml_set_name(positions, "positions"); + ggml_set_input(positions); embeddings = ggml_add(ctx0, embeddings, ggml_get_rows(ctx0, model.position_embeddings, positions)); @@ -560,15 +525,8 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 embeddings = ggml_reshape_2d(ctx0, embeddings, embeddings->ne[0], embeddings->ne[1]); struct ggml_tensor * patches = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_patches); - ggml_allocr_alloc(ctx->compute_alloc, patches); - if (!ggml_allocr_is_measure(ctx->compute_alloc)) { - int* patches_data = (int*)malloc(ggml_nbytes(patches)); - for (int i = 0; i < num_patches; i++) { - patches_data[i] = i + 1; - } - ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches)); - free(patches_data); - } + ggml_set_name(patches, "patches"); + ggml_set_input(patches); // shape [1, 576, 1024] // ne is whcn, ne = [1024, 576, 1, 1] @@ -809,7 +767,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { } // data - size_t buffer_size = 0; + size_t model_size = 0; { for (int i = 0; i < n_tensors; ++i) { const char * name = gguf_get_tensor_name(ctx, i); @@ -817,7 +775,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { enum ggml_type type = gguf_get_tensor_type(ctx, i); struct ggml_tensor * cur = ggml_get_tensor(meta, name); size_t tensor_size = ggml_nbytes(cur); - buffer_size += tensor_size; + model_size += tensor_size; if (verbosity >= 3) { printf("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%" PRIu64 ", %" PRIu64 ", %" PRIu64 ", %" PRIu64 "], type = %s\n", __func__, i, ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], ggml_type_name(type)); @@ -825,8 +783,6 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { } } - buffer_size += n_tensors * 128 /* CLIP PADDING */; - clip_ctx * new_clip = new clip_ctx; // update projector type @@ -886,12 +842,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { printf("%s: text_encoder: %d\n", __func__, new_clip->has_text_encoder); printf("%s: vision_encoder: %d\n", __func__, new_clip->has_vision_encoder); printf("%s: llava_projector: %d\n", __func__, new_clip->has_llava_projector); - printf("%s: model size: %.2f MB\n", __func__, buffer_size / 1024.0 / 1024.0); + printf("%s: model size: %.2f MB\n", __func__, model_size / 1024.0 / 1024.0); printf("%s: metadata size: %.2f MB\n", __func__, ggml_get_mem_size(meta) / 1024.0 / 1024.0); } } - printf("%s: params backend buffer size = % 6.2f MB (%i tensors)\n", __func__, buffer_size / (1024.0 * 1024.0), n_tensors); + printf("%s: params backend buffer size = % 6.2f MB (%i tensors)\n", __func__, model_size / (1024.0 * 1024.0), n_tensors); // load tensors { @@ -925,12 +881,10 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { } // alloc memory and offload data - new_clip->params_buffer = ggml_backend_alloc_buffer(new_clip->backend, buffer_size); - ggml_allocr* alloc = ggml_allocr_new_from_buffer(new_clip->params_buffer); + new_clip->params_buffer = ggml_backend_alloc_ctx_tensors(new_clip->ctx_data, new_clip->backend); for (int i = 0; i < n_tensors; ++i) { const char * name = gguf_get_tensor_name(ctx, i); struct ggml_tensor * cur = ggml_get_tensor(new_clip->ctx_data, name); - ggml_allocr_alloc(alloc, cur); const size_t offset = gguf_get_data_offset(ctx) + gguf_get_tensor_offset(ctx, i); fin.seekg(offset, std::ios::beg); if (!fin) { @@ -949,7 +903,6 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { ggml_backend_tensor_set(cur, read_buf.data(), 0, num_bytes); } } - ggml_allocr_free(alloc); fin.close(); } @@ -1077,15 +1030,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { // measure mem requirement and allocate { new_clip->buf_compute_meta.resize(GGML_DEFAULT_GRAPH_SIZE * ggml_tensor_overhead() + ggml_graph_overhead()); - new_clip->compute_alloc = ggml_allocr_new_measure_from_backend(new_clip->backend); + new_clip->compute_alloc = ggml_gallocr_new(ggml_backend_get_default_buffer_type(new_clip->backend)); clip_image_f32_batch batch; batch.size = 1; ggml_cgraph * gf = clip_image_build_graph(new_clip, &batch); - size_t compute_memory_buffer_size = ggml_allocr_alloc_graph(new_clip->compute_alloc, gf); - ggml_allocr_free(new_clip->compute_alloc); - new_clip->compute_buffer = ggml_backend_alloc_buffer(new_clip->backend, compute_memory_buffer_size); - new_clip->compute_alloc = ggml_allocr_new_from_buffer(new_clip->compute_buffer); - + ggml_gallocr_reserve(new_clip->compute_alloc, gf); + size_t compute_memory_buffer_size = ggml_gallocr_get_buffer_size(new_clip->compute_alloc, 0); printf("%s: compute allocated memory: %.2f MB\n", __func__, compute_memory_buffer_size /1024.0/1024.0); } @@ -1267,12 +1217,72 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima GGML_ASSERT(batch_size == 1); // TODO: support multiple images } - // reset alloc buffer to clean the memory from previous invocations - ggml_allocr_reset(ctx->compute_alloc); - // build the inference graph ggml_cgraph * gf = clip_image_build_graph(ctx, imgs); - ggml_allocr_alloc_graph(ctx->compute_alloc, gf); + ggml_gallocr_alloc_graph(ctx->compute_alloc, gf); + + // set inputs + const auto & model = ctx->vision_model; + const auto & hparams = model.hparams; + const int image_size = hparams.image_size; + const int patch_size = hparams.patch_size; + const int num_patches = ((image_size / patch_size) * (image_size / patch_size)); + const int num_positions = num_patches + 1; + + { + struct ggml_tensor * inp_raw = ggml_graph_get_tensor(gf, "inp_raw"); + float * data = (float *)malloc(ggml_nbytes(inp_raw)); + + for (size_t i = 0; i < imgs->size; i++) { + const int nx = imgs->data[i].nx; + const int ny = imgs->data[i].ny; + GGML_ASSERT(nx == image_size && ny == image_size); + + const int n = nx * ny; + + for (int b = 0; b < batch_size; b++) { + for (int k = 0; k < 3; k++) { + for (int y = 0; y < ny; y++) { + for (int x = 0; x < nx; x++) { + data[(b * 3 * n) + k * n + y * nx + x] = imgs->data[b].buf[3 * (y * nx + x) + k]; + } + } + } + } + } + ggml_backend_tensor_set(inp_raw, data, 0, ggml_nbytes(inp_raw)); + free(data); + } + + { + struct ggml_tensor * embeddings = ggml_graph_get_tensor(gf, "embeddings"); + + void* zero_mem = malloc(ggml_nbytes(embeddings)); + memset(zero_mem, 0, ggml_nbytes(embeddings)); + ggml_backend_tensor_set(embeddings, zero_mem, 0, ggml_nbytes(embeddings)); + free(zero_mem); + } + + { + struct ggml_tensor * positions = ggml_graph_get_tensor(gf, "positions"); + + int* positions_data = (int*)malloc(ggml_nbytes(positions)); + for (int i = 0; i < num_positions; i++) { + positions_data[i] = i; + } + ggml_backend_tensor_set(positions, positions_data, 0, ggml_nbytes(positions)); + free(positions_data); + } + + { + struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches"); + int* patches_data = (int*)malloc(ggml_nbytes(patches)); + for (int i = 0; i < num_patches; i++) { + patches_data[i] = i + 1; + } + ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches)); + free(patches_data); + } if (ggml_backend_is_cpu(ctx->backend)) { ggml_backend_cpu_set_n_threads(ctx->backend, n_threads); diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index eee9d4de3..2e2a8ce08 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -1,5 +1,6 @@ #include "ggml.h" #include "ggml-alloc.h" +#include "ggml-backend.h" #include "common.h" #include "train.h" #include "llama.h" @@ -19,8 +20,6 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif -static const size_t tensor_alignment = 32; - struct my_llama_hparams { uint32_t n_vocab = 32000; uint32_t n_ctx = 512; @@ -58,7 +57,7 @@ struct my_llama_layer { struct my_llama_model { struct ggml_context * ctx = NULL; - std::vector data; + ggml_backend_buffer_t data = NULL; my_llama_hparams hparams; @@ -147,39 +146,6 @@ static void set_param_model(struct my_llama_model * model) { } } -static void alloc_model(struct ggml_allocr * alloc, struct my_llama_model * model) { - ggml_allocr_alloc(alloc, model->tok_embeddings); - ggml_allocr_alloc(alloc, model->norm); - ggml_allocr_alloc(alloc, model->output); - for (uint32_t i = 0; i < model->layers.size(); ++i) { - auto & layer = model->layers[i]; - ggml_allocr_alloc(alloc, layer.attention_norm); - ggml_allocr_alloc(alloc, layer.wq); - ggml_allocr_alloc(alloc, layer.wk); - ggml_allocr_alloc(alloc, layer.wv); - ggml_allocr_alloc(alloc, layer.wo); - ggml_allocr_alloc(alloc, layer.ffn_norm); - ggml_allocr_alloc(alloc, layer.w1); - ggml_allocr_alloc(alloc, layer.w2); - ggml_allocr_alloc(alloc, layer.w3); - } - ggml_allocr_alloc(alloc, model->tok_embeddings->grad); - ggml_allocr_alloc(alloc, model->norm->grad); - ggml_allocr_alloc(alloc, model->output->grad); - for (uint32_t i = 0; i < model->layers.size(); ++i) { - auto & layer = model->layers[i]; - ggml_allocr_alloc(alloc, layer.attention_norm->grad); - ggml_allocr_alloc(alloc, layer.wq->grad); - ggml_allocr_alloc(alloc, layer.wk->grad); - ggml_allocr_alloc(alloc, layer.wv->grad); - ggml_allocr_alloc(alloc, layer.wo->grad); - ggml_allocr_alloc(alloc, layer.ffn_norm->grad); - ggml_allocr_alloc(alloc, layer.w1->grad); - ggml_allocr_alloc(alloc, layer.w2->grad); - ggml_allocr_alloc(alloc, layer.w3->grad); - } -} - static void init_model(struct my_llama_model * model) { const auto & hparams = model->hparams; @@ -252,17 +218,8 @@ static void init_model(struct my_llama_model * model) { set_param_model(model); - // measure data size - size_t size = 0; - for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { - size += GGML_PAD(ggml_nbytes(t), tensor_alignment); - } - // allocate data - struct ggml_allocr * alloc = NULL; - model->data.resize(size + tensor_alignment); - alloc = ggml_allocr_new(model->data.data(), model->data.size(), tensor_alignment); - alloc_model(alloc, model); + model->data = ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_cpu_buffer_type()); } static void randomize_model(struct my_llama_model * model, int seed, float mean, float std, float min, float max) { @@ -297,7 +254,7 @@ static void randomize_model(struct my_llama_model * model, int seed, float mean, static struct ggml_tensor * llama_build_train_graphs( struct my_llama_model * model, - struct ggml_allocr * alloc, + ggml_gallocr_t alloc, struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, @@ -308,7 +265,8 @@ static struct ggml_tensor * llama_build_train_graphs( const int n_tokens, const int n_batch, const bool enable_flash_attn, - const bool enable_checkpointing) { + const bool enable_checkpointing, + const bool measure_only) { ggml_set_scratch(ctx, { 0, 0, nullptr, }); const int n_past = 0; @@ -334,13 +292,7 @@ static struct ggml_tensor * llama_build_train_graphs( // KQ_pos - contains the positions struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N); - ggml_allocr_alloc(alloc, KQ_pos); - if (!ggml_allocr_is_measure(alloc)) { - int * data = (int *) KQ_pos->data; - for (int i = 0; i < N; ++i) { - data[i] = n_past + i; - } - } + ggml_set_input(KQ_pos); // rope has so much parameters that we make a custom function for it auto rope = [ctx, KQ_pos, n_rot, n_ctx, rope_freq_base, rope_freq_scale] @@ -448,21 +400,31 @@ static struct ggml_tensor * llama_build_train_graphs( // KQ_pos ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, KQ_pos, 1.0f)); GGML_ASSERT(t36->grad->data == NULL && t36->grad->view_src == NULL); - - ggml_allocr_alloc(alloc, t36->grad); + ggml_set_input(t36->grad); // allocating checkpoints in one block to reduce memory fragmentation // note: they will be freed in reverse order for (int i = 0; i < (int) checkpoints.size(); ++i) { if (checkpoints[i]->data == NULL && checkpoints[i]->view_src == NULL) { - ggml_allocr_alloc(alloc, checkpoints[i]); + ggml_set_input(checkpoints[i]); } } //int n_leafs_after = gb->n_leafs; //int n_nodes_after = gb->n_nodes; + if (measure_only) { + // FIXME: will still allocate + ggml_gallocr_reserve(alloc, gb); + } else { + ggml_gallocr_alloc_graph(alloc, gb); - ggml_allocr_alloc_graph(alloc, gb); + if (!measure_only) { + int * data = (int *) KQ_pos->data; + for (int i = 0; i < N; ++i) { + data[i] = n_past + i; + } + } + } // remove the additional nodes and leafs for (int i = n_leafs_before; i < gb->n_leafs; ++i) { @@ -1046,7 +1008,7 @@ int main(int argc, char ** argv) { printf("%s: seen train_samples %llu\n", __func__, (long long unsigned) train->train_samples); printf("%s: seen train_tokens %llu\n", __func__, (long long unsigned) train->train_tokens); printf("%s: completed train_epochs %llu\n", __func__, (long long unsigned) train->train_epochs); - printf("%s: model_size = %zu bytes (%.1f MB)\n", __func__, (ggml_used_mem(model.ctx) + model.data.size()), (float) (ggml_used_mem(model.ctx) + model.data.size()) / (1024.0f*1024.0f)); + printf("%s: model_size = %zu bytes (%.1f MB)\n", __func__, (ggml_used_mem(model.ctx) + ggml_backend_buffer_get_size(model.data)), (float) (ggml_used_mem(model.ctx) + ggml_backend_buffer_get_size(model.data)) / (1024.0f*1024.0f)); if (params.only_write_model) { save_train_files_data save_data; @@ -1073,11 +1035,6 @@ int main(int argc, char ** argv) { int n_vocab = model.hparams.n_vocab; int n_batch = params.common.n_batch; - std::vector mem_input_data; - std::vector mem_compute_data; - - ggml_allocr * alloc = NULL; - // context for input tensors without their data struct ggml_init_params ctx_input_params = { ggml_tensor_overhead() * 2, // mem_size @@ -1091,16 +1048,10 @@ int main(int argc, char ** argv) { struct ggml_tensor * target_probs = ggml_new_tensor_3d(ctx_input, GGML_TYPE_F32, n_vocab, n_tokens, n_batch); // measure required memory for input tensors - size_t max_input_size = GGML_PAD(ggml_nbytes(tokens_input), tensor_alignment) + - GGML_PAD(ggml_nbytes(target_probs), tensor_alignment) + - tensor_alignment; - printf("%s: input_size = %zu bytes (%.1f MB)\n", __func__, max_input_size, (float) max_input_size / (1024.0f*1024.0f)); - // allocate input tensors - mem_input_data.resize(max_input_size); - alloc = ggml_allocr_new(mem_input_data.data(), mem_input_data.size(), tensor_alignment); - ggml_allocr_alloc(alloc, tokens_input); - ggml_allocr_alloc(alloc, target_probs); + ggml_backend_buffer_t input_data = ggml_backend_alloc_ctx_tensors_from_buft(ctx_input, ggml_backend_cpu_buffer_type()); + size_t max_input_size = ggml_backend_buffer_get_size(input_data); + printf("%s: input_size = %zu bytes (%.1f MB)\n", __func__, max_input_size, (float) max_input_size / (1024.0f*1024.0f)); // context for compute tensors without their data const size_t estimated_compute_size_wo_data = ( @@ -1127,7 +1078,7 @@ int main(int argc, char ** argv) { // find best evaluation order for (unsigned order = 0; order < (unsigned) GGML_CGRAPH_EVAL_ORDER_COUNT; ++order) { ctx_compute = ggml_init(ctx_compute_params); - alloc = ggml_allocr_new_measure(tensor_alignment); + ggml_gallocr_t alloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type()); gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); gf->order = (enum ggml_cgraph_eval_order) order; gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); @@ -1140,9 +1091,10 @@ int main(int argc, char ** argv) { &logits, tokens_input, target_probs, n_tokens, n_batch, params.common.use_flash, - params.common.use_checkpointing + params.common.use_checkpointing, + true ); - size_t max_compute_size = ggml_allocr_max_size(alloc) + tensor_alignment; + size_t max_compute_size = ggml_gallocr_get_buffer_size(alloc, 0); // FIXME: this will still allocate the buffer if (max_compute_size < best_compute_size) { best_compute_size = max_compute_size; best_order = gf->order; @@ -1157,9 +1109,8 @@ int main(int argc, char ** argv) { "invalid"); // allocate compute tensors - mem_compute_data.resize(max_compute_size); ctx_compute = ggml_init(ctx_compute_params); - alloc = ggml_allocr_new(mem_compute_data.data(), mem_compute_data.size(), tensor_alignment); + ggml_gallocr_t alloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type()); gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); gf->order = best_order; gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); @@ -1172,7 +1123,8 @@ int main(int argc, char ** argv) { &logits, tokens_input, target_probs, n_tokens, n_batch, params.common.use_flash, - params.common.use_checkpointing + params.common.use_checkpointing, + false ); std::vector train_tokens; diff --git a/ggml-alloc.c b/ggml-alloc.c index f9be6e1cb..c28c37c4f 100644 --- a/ggml-alloc.c +++ b/ggml-alloc.c @@ -17,397 +17,11 @@ //#define AT_PRINTF(...) fprintf(stderr, __VA_ARGS__) #define AT_PRINTF(...) -// TODO: GGML_PAD ? -static size_t aligned_offset(const void * buffer, size_t offset, size_t alignment) { - assert(alignment && !(alignment & (alignment - 1))); // power of 2 - size_t align = (alignment - (((uintptr_t)buffer + offset) % alignment)) % alignment; - return offset + align; -} -struct free_block { - void * addr; - size_t size; -}; - -struct ggml_tallocr { - struct ggml_backend_buffer * buffer; - bool buffer_owned; - void * base; - size_t alignment; - - int n_free_blocks; - struct free_block free_blocks[MAX_FREE_BLOCKS]; - - size_t max_size; - - bool measure; - -#ifdef GGML_ALLOCATOR_DEBUG - struct ggml_tensor * allocated_tensors[1024]; -#endif -}; - -#ifdef GGML_ALLOCATOR_DEBUG -static void add_allocated_tensor(ggml_tallocr_t alloc, struct ggml_tensor * tensor) { - for (int i = 0; i < 1024; i++) { - if (alloc->allocated_tensors[i] == NULL) { - alloc->allocated_tensors[i] = tensor; - return; - } - } - GGML_ASSERT(!"out of allocated_tensors"); -} -static void remove_allocated_tensor(ggml_tallocr_t alloc, struct ggml_tensor * tensor) { - for (int i = 0; i < 1024; i++) { - if (alloc->allocated_tensors[i] == tensor || - (alloc->allocated_tensors[i] != NULL && alloc->allocated_tensors[i]->data == tensor->data)) { - alloc->allocated_tensors[i] = NULL; - return; - } - } - printf("tried to free tensor %s not found\n", tensor->name); - GGML_ASSERT(!"tensor not found"); -} -#endif - -// check if a tensor is allocated by this buffer -static bool ggml_tallocr_is_own(ggml_tallocr_t alloc, const struct ggml_tensor * tensor) { - return tensor->buffer == alloc->buffer && (!tensor->view_src || tensor->view_src->buffer == alloc->buffer); -} - -static bool ggml_is_view(struct ggml_tensor * t) { +static bool ggml_is_view(const struct ggml_tensor * t) { return t->view_src != NULL; } -void ggml_tallocr_alloc(ggml_tallocr_t alloc, struct ggml_tensor * tensor) { - GGML_ASSERT(!ggml_is_view(tensor)); // views generally get data pointer from one of their sources - GGML_ASSERT(tensor->data == NULL); // avoid allocating tensor which already has memory allocated - - size_t size = ggml_backend_buffer_get_alloc_size(alloc->buffer, tensor); - size = aligned_offset(NULL, size, alloc->alignment); - - AT_PRINTF("%s: allocating %s (%zu bytes) - ", __func__, tensor->name, size); - - size_t max_avail = 0; - - // find the best fitting free block besides the last block - int best_fit_block = -1; - size_t best_fit_size = SIZE_MAX; - for (int i = 0; i < alloc->n_free_blocks - 1; i++) { - struct free_block * block = &alloc->free_blocks[i]; - max_avail = MAX(max_avail, block->size); - if (block->size >= size && block->size <= best_fit_size) { - best_fit_block = i; - best_fit_size = block->size; - } - } - - if (best_fit_block == -1) { - // the last block is our last resort - struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1]; - max_avail = MAX(max_avail, block->size); - if (block->size >= size) { - best_fit_block = alloc->n_free_blocks - 1; - } else { - fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, largest block available %zu)\n", - __func__, tensor->name, size, max_avail); - GGML_ASSERT(!"not enough space in the buffer"); - return; - } - } - - struct free_block * block = &alloc->free_blocks[best_fit_block]; - void * addr = block->addr; - block->addr = (char*)block->addr + size; - block->size -= size; - if (block->size == 0) { - // remove block if empty - alloc->n_free_blocks--; - for (int j = best_fit_block; j < alloc->n_free_blocks; j++) { - alloc->free_blocks[j] = alloc->free_blocks[j+1]; - } - } - - AT_PRINTF("block %d, addr %p\n", best_fit_block, addr); - - tensor->data = addr; - tensor->buffer = alloc->buffer; - if (!alloc->measure) { - ggml_backend_buffer_init_tensor(alloc->buffer, tensor); - } - -#ifdef GGML_ALLOCATOR_DEBUG - add_allocated_tensor(alloc, tensor); - size_t cur_max = (char*)addr - (char*)alloc->base + size; - if (cur_max > alloc->max_size) { - printf("max_size = %.2f MB: tensors: ", cur_max / 1024.0 / 1024.0); - for (int i = 0; i < 1024; i++) { - if (alloc->allocated_tensors[i]) { - printf("%s (%.2f MB) ", alloc->allocated_tensors[i]->name, ggml_nbytes(alloc->allocated_tensors[i]) / 1024.0 / 1024.0); - } - } - printf("\n"); - } -#endif - - alloc->max_size = MAX(alloc->max_size, (char*)addr - (char*)alloc->base + size); -} - -// this is a very naive implementation, but for our case the number of free blocks should be very small -static void ggml_tallocr_free_tensor(ggml_tallocr_t alloc, struct ggml_tensor * tensor) { - if (ggml_tallocr_is_own(alloc, tensor) == false) { - // the tensor was not allocated in this buffer - // this can happen because the graph allocator will try to free weights and other tensors from different buffers - // the easiest way to deal with this is just to ignore it - // AT_PRINTF("ignoring %s (their buffer: %p, our buffer: %p)\n", tensor->name, (void *)tensor->buffer, (void *)alloc->buffer); - return; - } - - void * ptr = tensor->data; - - size_t size = ggml_backend_buffer_get_alloc_size(alloc->buffer, tensor); - size = aligned_offset(NULL, size, alloc->alignment); - AT_PRINTF("%s: freeing %s at %p (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, ptr, size, alloc->n_free_blocks); - -#ifdef GGML_ALLOCATOR_DEBUG - remove_allocated_tensor(alloc, tensor); -#endif - - // see if we can merge with an existing block - for (int i = 0; i < alloc->n_free_blocks; i++) { - struct free_block * block = &alloc->free_blocks[i]; - // check if ptr is at the end of the block - if ((char*)block->addr + block->size == ptr) { - block->size += size; - // check if we can merge with the next block - if (i < alloc->n_free_blocks - 1 && (char*)block->addr + block->size == alloc->free_blocks[i+1].addr) { - block->size += alloc->free_blocks[i+1].size; - alloc->n_free_blocks--; - for (int j = i+1; j < alloc->n_free_blocks; j++) { - alloc->free_blocks[j] = alloc->free_blocks[j+1]; - } - } - return; - } - // check if ptr is at the beginning of the block - if ((char*)ptr + size == block->addr) { - block->addr = ptr; - block->size += size; - // check if we can merge with the previous block - if (i > 0 && (char*)alloc->free_blocks[i-1].addr + alloc->free_blocks[i-1].size == block->addr) { - alloc->free_blocks[i-1].size += block->size; - alloc->n_free_blocks--; - for (int j = i; j < alloc->n_free_blocks; j++) { - alloc->free_blocks[j] = alloc->free_blocks[j+1]; - } - } - return; - } - } - // otherwise, add a new block - GGML_ASSERT(alloc->n_free_blocks < MAX_FREE_BLOCKS && "out of free blocks"); - // insert the new block in the correct position to keep the array sorted by address (to make merging blocks faster) - int insert_pos = 0; - while (insert_pos < alloc->n_free_blocks && alloc->free_blocks[insert_pos].addr < ptr) { - insert_pos++; - } - // shift all blocks from insert_pos onward to make room for the new block - for (int i = alloc->n_free_blocks; i > insert_pos; i--) { - alloc->free_blocks[i] = alloc->free_blocks[i-1]; - } - // insert the new block - alloc->free_blocks[insert_pos].addr = ptr; - alloc->free_blocks[insert_pos].size = size; - alloc->n_free_blocks++; -} - -void ggml_tallocr_reset(ggml_tallocr_t alloc) { - alloc->n_free_blocks = 1; - size_t align_offset = aligned_offset(alloc->base, 0, alloc->alignment); - alloc->free_blocks[0].addr = (char *)alloc->base + align_offset; - - if (alloc->measure) { - alloc->free_blocks[0].size = SIZE_MAX/2; // restrict maximum size of a measure allocator to half size_t max to avoid overflows - } else { - alloc->free_blocks[0].size = ggml_backend_buffer_get_size(alloc->buffer) - align_offset; - ggml_backend_buffer_reset(alloc->buffer); - } -} - -ggml_tallocr_t ggml_tallocr_new(void * data, size_t size, size_t alignment) { - struct ggml_backend_buffer * buffer = ggml_backend_cpu_buffer_from_ptr(data, size); - - ggml_tallocr_t alloc = (ggml_tallocr_t)malloc(sizeof(struct ggml_tallocr)); - - *alloc = (struct ggml_tallocr) { - /*.buffer = */ buffer, - /*.buffer_owned = */ true, - /*.base = */ ggml_backend_buffer_get_base(buffer), - /*.alignment = */ alignment, - /*.n_free_blocks = */ 0, - /*.free_blocks = */ {{0}}, - /*.max_size = */ 0, - /*.measure = */ false, -#ifdef GGML_ALLOCATOR_DEBUG - /*.allocated_tensors = */ {0}, -#endif - }; - - ggml_tallocr_reset(alloc); - - return alloc; -} - -ggml_tallocr_t ggml_tallocr_new_measure(size_t alignment) { - ggml_tallocr_t alloc = ggml_tallocr_new((void *)0x1000, SIZE_MAX/2, alignment); - alloc->measure = true; - - return alloc; -} - -ggml_tallocr_t ggml_tallocr_new_measure_from_buft(struct ggml_backend_buffer_type * buft) { - // create a backend buffer to get the correct tensor allocation sizes - ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, 1); - - // TODO: move alloc initialization to a common ggml_tallocr_new_impl function - ggml_tallocr_t alloc = ggml_tallocr_new_from_buffer(buffer); - alloc->buffer_owned = true; - alloc->measure = true; - ggml_tallocr_reset(alloc); - return alloc; -} - -ggml_tallocr_t ggml_tallocr_new_measure_from_backend(struct ggml_backend * backend) { - return ggml_tallocr_new_measure_from_buft(ggml_backend_get_default_buffer_type(backend)); -} - -ggml_tallocr_t ggml_tallocr_new_from_buft(struct ggml_backend_buffer_type * buft, size_t size) { - // create a backend buffer to get the correct tensor allocation sizes - ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size); - ggml_tallocr_t alloc = ggml_tallocr_new_from_buffer(buffer); - alloc->buffer_owned = true; - return alloc; -} - -ggml_tallocr_t ggml_tallocr_new_from_backend(struct ggml_backend * backend, size_t size) { - return ggml_tallocr_new_from_buft(ggml_backend_get_default_buffer_type(backend), size); -} - -ggml_tallocr_t ggml_tallocr_new_from_buffer(struct ggml_backend_buffer * buffer) { - ggml_tallocr_t alloc = (ggml_tallocr_t)malloc(sizeof(struct ggml_tallocr)); - - *alloc = (struct ggml_tallocr) { - /*.buffer = */ buffer, - /*.buffer_owned = */ false, - /*.base = */ ggml_backend_buffer_get_base(buffer), - /*.alignment = */ ggml_backend_buffer_get_alignment(buffer), - /*.n_free_blocks = */ 0, - /*.free_blocks = */ {{0}}, - /*.max_size = */ 0, - /*.measure = */ false, -#ifdef GGML_ALLOCATOR_DEBUG - /*.allocated_tensors = */ {0}, -#endif - }; - - ggml_tallocr_reset(alloc); - - return alloc; -} - -struct ggml_backend_buffer * ggml_tallocr_get_buffer(ggml_tallocr_t alloc) { - return alloc->buffer; -} - -void ggml_tallocr_free(ggml_tallocr_t alloc) { - if (alloc == NULL) { - return; - } - - if (alloc->buffer_owned) { - ggml_backend_buffer_free(alloc->buffer); - } - free(alloc); -} - -bool ggml_tallocr_is_measure(ggml_tallocr_t alloc) { - return alloc->measure; -} - -size_t ggml_tallocr_max_size(ggml_tallocr_t alloc) { - // FIXME: changes in the tensor sizes compared to the measure graph may cause allocations to fail - // to avoid this, we add a 10% margin to the buffer size - return alloc->max_size + alloc->max_size/10; -} - -// graph allocator - -struct hash_node { - int n_children; - int n_views; -}; - -struct ggml_gallocr { - ggml_tallocr_t talloc; - struct ggml_hash_set hash_set; - struct hash_node * hash_values; - size_t hash_values_size; - ggml_tallocr_t * hash_allocs; - int * parse_seq; - int parse_seq_len; -}; - -ggml_gallocr_t ggml_gallocr_new(void) { - ggml_gallocr_t galloc = (ggml_gallocr_t)malloc(sizeof(struct ggml_gallocr)); - - *galloc = (struct ggml_gallocr) { - /*.talloc = */ NULL, - /*.hash_set = */ {0}, - /*.hash_values = */ NULL, - /*.hash_values_size = */ 0, - /*.hash_allocs = */ NULL, - /*.parse_seq = */ NULL, - /*.parse_seq_len = */ 0, - }; - - return galloc; -} - -void ggml_gallocr_free(ggml_gallocr_t galloc) { - if (galloc == NULL) { - return; - } - - if (galloc->hash_set.keys != NULL) { - free(galloc->hash_set.keys); - } - if (galloc->hash_values != NULL) { - free(galloc->hash_values); - } - if (galloc->hash_allocs != NULL) { - free(galloc->hash_allocs); - } - if (galloc->parse_seq != NULL) { - free(galloc->parse_seq); - } - free(galloc); -} - -void ggml_gallocr_set_parse_seq(ggml_gallocr_t galloc, const int * list, int n) { - free(galloc->parse_seq); - galloc->parse_seq = malloc(sizeof(int) * n); - - for (int i = 0; i < n; i++) { - galloc->parse_seq[i] = list[i]; - } - galloc->parse_seq_len = n; -} - -static struct hash_node * hash_get(ggml_gallocr_t galloc, struct ggml_tensor * t) { - size_t i = ggml_hash_find_or_insert(galloc->hash_set, t); - return &galloc->hash_values[i]; -} - static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) { if (a->type != b->type) { return false; @@ -447,106 +61,511 @@ static bool ggml_op_can_inplace(enum ggml_op op) { } } -static ggml_tallocr_t node_tallocr(ggml_gallocr_t galloc, struct ggml_tensor * node) { - if (galloc->talloc != NULL) { - return galloc->talloc; - } - - return galloc->hash_allocs[ggml_hash_find_or_insert(galloc->hash_set, node)]; +// TODO: GGML_PAD ? +static size_t aligned_offset(const void * buffer, size_t offset, size_t alignment) { + assert(alignment && !(alignment & (alignment - 1))); // power of 2 + size_t align = (alignment - (((uintptr_t)buffer + offset) % alignment)) % alignment; + return offset + align; } -static void init_view(ggml_gallocr_t galloc, struct ggml_tensor * view, bool update_backend) { - ggml_tallocr_t alloc = node_tallocr(galloc, view); +// tallocr +struct ggml_tallocr { + ggml_backend_buffer_t buffer; + void * base; + size_t alignment; + size_t offset; +}; - GGML_ASSERT(view->view_src != NULL && view->view_src->data != NULL); - if (update_backend) { - view->backend = view->view_src->backend; +ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer) { + ggml_tallocr_t talloc = malloc(sizeof(struct ggml_tallocr)); + if (talloc == NULL) { + return NULL; } - // views are initialized in the alloc buffer rather than the view_src buffer - view->buffer = alloc->buffer; - view->data = (char *)view->view_src->data + view->view_offs; - assert(ggml_tallocr_is_measure(alloc) || !view->buffer || view->buffer->buft == alloc->buffer->buft); + void * base = ggml_backend_buffer_get_base(buffer); + size_t align = ggml_backend_buffer_get_alignment(buffer); - if (!alloc->measure) { - ggml_backend_buffer_init_tensor(alloc->buffer, view); - } + assert(align && !(align & (align - 1))); // power of 2 + + *talloc = (struct ggml_tallocr) { + /*.buffer = */ buffer, + /*.base = */ base, + /*.alignment = */ align, + /*.offset = */ aligned_offset(base, 0, align), + }; + return talloc; } -static void allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node) { - ggml_tallocr_t alloc = node_tallocr(galloc, node); +void ggml_tallocr_free(ggml_tallocr_t talloc) { + free(talloc); +} - if (node->data == NULL) { - if (ggml_is_view(node)) { - init_view(galloc, node, true); +void ggml_tallocr_alloc(ggml_tallocr_t talloc, struct ggml_tensor * tensor) { + size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor); + size = GGML_PAD(size, talloc->alignment); + + if (talloc->offset + size > ggml_backend_buffer_get_size(talloc->buffer)) { + fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n", + __func__, tensor->name, size, ggml_backend_buffer_get_size(talloc->buffer) - talloc->offset); + GGML_ASSERT(!"not enough space in the buffer"); + return; + } + + void * addr = (char *)ggml_backend_buffer_get_base(talloc->buffer) + talloc->offset; + talloc->offset += size; + + assert(((uintptr_t)addr % talloc->alignment) == 0); + + ggml_backend_tensor_alloc(talloc->buffer, tensor, addr); +} + +// dynamic tensor allocator + +struct free_block { + size_t offset; + size_t size; +}; + +struct ggml_dyn_tallocr { + size_t alignment; + int n_free_blocks; + struct free_block free_blocks[MAX_FREE_BLOCKS]; + size_t max_size; + +#ifdef GGML_ALLOCATOR_DEBUG + struct { + const struct ggml_tensor * tensor; + size_t offset; + } allocated_tensors[1024]; +#endif +}; + +#ifdef GGML_ALLOCATOR_DEBUG +static void add_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset, const struct ggml_tensor * tensor) { + for (int i = 0; i < 1024; i++) { + if (alloc->allocated_tensors[i].tensor == NULL) { + alloc->allocated_tensors[i].tensor = tensor; + alloc->allocated_tensors[i].offset = offset; + return; + } + } + GGML_ASSERT(!"out of allocated_tensors"); +} +static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset, const struct ggml_tensor * tensor) { + for (int i = 0; i < 1024; i++) { + if (alloc->allocated_tensors[i].offset == offset) { + alloc->allocated_tensors[i].tensor = NULL; + return; + } + } + fprintf(stderr, "tried to free tensor %s not found\n", tensor->name); + GGML_ASSERT(!"tensor not found"); +} +#endif + +static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t size, const struct ggml_tensor * tensor) { + size = aligned_offset(NULL, size, alloc->alignment); + + AT_PRINTF("%s: allocating %s (%zu bytes) - ", __func__, tensor->name, size); + + size_t max_avail = 0; + + // find the best fitting free block besides the last block + int best_fit_block = -1; + size_t best_fit_size = SIZE_MAX; + for (int i = 0; i < alloc->n_free_blocks - 1; i++) { + struct free_block * block = &alloc->free_blocks[i]; + max_avail = MAX(max_avail, block->size); + if (block->size >= size && block->size <= best_fit_size) { + best_fit_block = i; + best_fit_size = block->size; + } + } + + if (best_fit_block == -1) { + // the last block is our last resort + struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1]; + max_avail = MAX(max_avail, block->size); + if (block->size >= size) { + best_fit_block = alloc->n_free_blocks - 1; } else { - // see if we can reuse a parent's buffer (inplace) - if (ggml_op_can_inplace(node->op)) { - for (int i = 0; i < GGML_MAX_SRC; i++) { - struct ggml_tensor * parent = node->src[i]; - if (parent == NULL) { - break; - } + // this should never happen + fprintf(stderr, "%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n", + __func__, size, max_avail); + GGML_ASSERT(!"not enough space in the buffer"); + GGML_UNREACHABLE(); + } + } - // if the node's data is external, then we cannot re-use it - if (ggml_tallocr_is_own(alloc, parent) == false) { - AT_PRINTF("not reusing parent %s for %s as %p is external\n", parent->name, node->name, parent->data); - continue; - } + struct free_block * block = &alloc->free_blocks[best_fit_block]; + size_t offset = block->offset; + block->offset = offset + size; + block->size -= size; + if (block->size == 0) { + // remove block if empty + alloc->n_free_blocks--; + for (int j = best_fit_block; j < alloc->n_free_blocks; j++) { + alloc->free_blocks[j] = alloc->free_blocks[j+1]; + } + } - struct hash_node * p_hn = hash_get(galloc, parent); - if (parent->data != NULL && p_hn->n_children == 1 && p_hn->n_views == 0 && ggml_are_same_layout(node, parent)) { - if (ggml_is_view(parent)) { - struct ggml_tensor * view_src = parent->view_src; - struct hash_node * view_src_hn = hash_get(galloc, view_src); - if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && view_src->data == parent->data) { - // TODO: the offset of the view parent must be kept to ensure that the op doesn't overwrite - // the parent's data that it will need later (same layout requirement). the problem is that then - // we cannot free the tensor because the original address of the allocation is lost. - // adding a view_src pointer to the tensor would solve this and simplify the code dealing with views - // for now, we only reuse the parent's data if the offset is zero (view_src->data == parent->data) - AT_PRINTF("reusing view parent %s (%s) for %s\n", parent->name, view_src->name, node->name); - node->view_src = view_src; - view_src_hn->n_views += 1; - init_view(galloc, node, false); - return; - } - } else { - AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name); - node->view_src = parent; - p_hn->n_views += 1; - init_view(galloc, node, false); + AT_PRINTF("block %d, offset %zu\n", best_fit_block, offset); + +#ifdef GGML_ALLOCATOR_DEBUG + add_allocated_tensor(alloc, offset, tensor); + size_t cur_max = offset + size; + if (cur_max > alloc->max_size) { + // sort allocated_tensors by offset + for (int i = 0; i < 1024; i++) { + for (int j = i + 1; j < 1024; j++) { + if (alloc->allocated_tensors[i].offset > alloc->allocated_tensors[j].offset) { + const struct ggml_tensor * tmp_tensor = alloc->allocated_tensors[i].tensor; + size_t tmp_offset = alloc->allocated_tensors[i].offset; + alloc->allocated_tensors[i].tensor = alloc->allocated_tensors[j].tensor; + alloc->allocated_tensors[i].offset = alloc->allocated_tensors[j].offset; + alloc->allocated_tensors[j].tensor = tmp_tensor; + alloc->allocated_tensors[j].offset = tmp_offset; + } + } + } + fprintf(stderr, "max_size = %.2f MB: tensors: ", cur_max / 1024.0 / 1024.0); + for (int i = 0; i < 1024; i++) { + if (alloc->allocated_tensors[i].tensor) { + fprintf(stderr, "%s [%zx-%zx] (%.2f MB) ", alloc->allocated_tensors[i].tensor->name, + alloc->allocated_tensors[i].offset, + alloc->allocated_tensors[i].offset + ggml_nbytes(alloc->allocated_tensors[i].tensor), + ggml_nbytes(alloc->allocated_tensors[i].tensor) / 1024.0 / 1024.0); + } + } + fprintf(stderr, "\n"); + } +#endif + + alloc->max_size = MAX(alloc->max_size, offset + size); + + return offset; + + GGML_UNUSED(tensor); +} + +// this is a very naive implementation, but for our case the number of free blocks should be very small +static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, size_t offset, size_t size, const struct ggml_tensor * tensor) { + size = aligned_offset(NULL, size, alloc->alignment); + + AT_PRINTF("%s: freeing %s at %zu (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, offset, size, alloc->n_free_blocks); + +#ifdef GGML_ALLOCATOR_DEBUG + remove_allocated_tensor(alloc, offset, tensor); +#endif + + // see if we can merge with an existing block + for (int i = 0; i < alloc->n_free_blocks; i++) { + struct free_block * block = &alloc->free_blocks[i]; + // check if ptr is at the end of the block + if (block->offset + block->size == offset) { + block->size += size; + // check if we can merge with the next block + if (i < alloc->n_free_blocks - 1 && block->offset + block->size == alloc->free_blocks[i+1].offset) { + block->size += alloc->free_blocks[i+1].size; + alloc->n_free_blocks--; + for (int j = i+1; j < alloc->n_free_blocks; j++) { + alloc->free_blocks[j] = alloc->free_blocks[j+1]; + } + } + return; + } + // check if ptr is at the beginning of the block + if (offset + size == block->offset) { + block->offset = offset; + block->size += size; + // check if we can merge with the previous block + if (i > 0 && alloc->free_blocks[i-1].offset + alloc->free_blocks[i-1].size == block->offset) { + alloc->free_blocks[i-1].size += block->size; + alloc->n_free_blocks--; + for (int j = i; j < alloc->n_free_blocks; j++) { + alloc->free_blocks[j] = alloc->free_blocks[j+1]; + } + } + return; + } + } + // otherwise, add a new block + GGML_ASSERT(alloc->n_free_blocks < MAX_FREE_BLOCKS && "out of free blocks"); + // insert the new block in the correct position to keep the array sorted by address (to make merging blocks faster) + int insert_pos = 0; + while (insert_pos < alloc->n_free_blocks && alloc->free_blocks[insert_pos].offset < offset) { + insert_pos++; + } + // shift all blocks from insert_pos onward to make room for the new block + for (int i = alloc->n_free_blocks; i > insert_pos; i--) { + alloc->free_blocks[i] = alloc->free_blocks[i-1]; + } + // insert the new block + alloc->free_blocks[insert_pos].offset = offset; + alloc->free_blocks[insert_pos].size = size; + alloc->n_free_blocks++; + + GGML_UNUSED(tensor); +} + +static void ggml_dyn_tallocr_reset(struct ggml_dyn_tallocr * alloc) { + alloc->n_free_blocks = 1; + alloc->free_blocks[0].offset = 0; + alloc->free_blocks[0].size = SIZE_MAX/2; // restrict maximum size of a measure allocator to half size_t max to avoid overflows + alloc->max_size = 0; +} + +static struct ggml_dyn_tallocr * ggml_dyn_tallocr_new(size_t alignment) { + struct ggml_dyn_tallocr * alloc = (struct ggml_dyn_tallocr *)malloc(sizeof(struct ggml_dyn_tallocr)); + + *alloc = (struct ggml_dyn_tallocr) { + /*.alignment = */ alignment, + /*.n_free_blocks = */ 0, + /*.free_blocks = */ {{0}}, + /*.max_size = */ 0, +#ifdef GGML_ALLOCATOR_DEBUG + /*.allocated_tensors = */ {{0}}, +#endif + }; + + ggml_dyn_tallocr_reset(alloc); + + return alloc; +} + +static void ggml_dyn_tallocr_free(struct ggml_dyn_tallocr * alloc) { + free(alloc); +} + +static size_t ggml_dyn_tallocr_max_size(struct ggml_dyn_tallocr * alloc) { + return alloc->max_size; +} + + +///////////////////////////////////// + +// graph allocator + +struct hash_node { + int n_children; + int n_views; + int buffer_id; + size_t offset; // offset within the buffer + bool allocated; +}; + +// +struct tensor_alloc { + size_t offset; + size_t size_max; // 0 = pre-allocated, unused, or view +}; + +struct node_alloc { + int buffer_id; + struct tensor_alloc dst; + struct tensor_alloc src[GGML_MAX_SRC]; +}; + +struct ggml_gallocr { + ggml_backend_buffer_type_t * bufts; // [n_buffers] + ggml_backend_buffer_t * buffers; // [n_buffers] + struct ggml_dyn_tallocr ** buf_tallocs; // [n_buffers] + int n_buffers; + + struct ggml_hash_set hash_set; + struct hash_node * hash_values; // [hash_set.size] + + struct node_alloc * node_allocs; // [n_nodes] + int n_nodes; +}; + +ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs) { + ggml_gallocr_t galloc = (ggml_gallocr_t)calloc(sizeof(struct ggml_gallocr), 1); + GGML_ASSERT(galloc != NULL); + + galloc->bufts = calloc(sizeof(ggml_backend_buffer_type_t) * n_bufs, 1); + GGML_ASSERT(galloc->bufts != NULL); + + galloc->buffers = calloc(sizeof(ggml_backend_buffer_t) * n_bufs, 1); + GGML_ASSERT(galloc->buffers != NULL); + + galloc->buf_tallocs = calloc(sizeof(struct ggml_dyn_tallocr *) * n_bufs, 1); + GGML_ASSERT(galloc->buf_tallocs != NULL); + + for (int i = 0; i < n_bufs; i++) { + galloc->bufts[i] = bufts[i]; + galloc->buffers[i] = NULL; + size_t alignment = ggml_backend_buft_get_alignment(bufts[i]); + galloc->buf_tallocs[i] = ggml_dyn_tallocr_new(alignment); + } + galloc->n_buffers = n_bufs; + + return galloc; +} + +ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft) { + return ggml_gallocr_new_n(&buft, 1); +} + +void ggml_gallocr_free(ggml_gallocr_t galloc) { + if (galloc == NULL) { + return; + } + + for (int i = 0; i < galloc->n_buffers; i++) { + if (galloc->buffers != NULL) { + ggml_backend_buffer_free(galloc->buffers[i]); + } + if (galloc->buf_tallocs != NULL) { + ggml_dyn_tallocr_free(galloc->buf_tallocs[i]); + } + } + + free(galloc->hash_set.keys); + free(galloc->hash_values); + free(galloc->bufts); + free(galloc->buffers); + free(galloc->buf_tallocs); + free(galloc->node_allocs); + free(galloc); +} + +typedef struct ggml_gallocr * ggml_gallocr_t; + +static struct hash_node * ggml_gallocr_hash_get(ggml_gallocr_t galloc, struct ggml_tensor * t) { + size_t i = ggml_hash_find_or_insert(galloc->hash_set, t); + return &galloc->hash_values[i]; +} + +static bool ggml_gallocr_is_own(ggml_gallocr_t galloc, struct ggml_tensor * t) { + return ggml_gallocr_hash_get(galloc, t)->allocated; +} + +static void ggml_gallocr_set_node_offset(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id, size_t offset) { + struct hash_node * hn = ggml_gallocr_hash_get(galloc, node); + hn->buffer_id = buffer_id; + hn->offset = offset; + hn->allocated = true; +} + +static bool ggml_gallocr_is_allocated(ggml_gallocr_t galloc, struct ggml_tensor * t) { + return t->data != NULL || ggml_gallocr_hash_get(galloc, t)->allocated; +} + +static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id) { + struct hash_node * hn = ggml_gallocr_hash_get(galloc, node); + + if (!ggml_gallocr_is_allocated(galloc, node) && !ggml_is_view(node)) { + hn->allocated = true; + assert(hn->offset == 0); + + // try to reuse a parent's buffer (inplace) + if (ggml_op_can_inplace(node->op)) { + for (int i = 0; i < GGML_MAX_SRC; i++) { + struct ggml_tensor * parent = node->src[i]; + if (parent == NULL) { + break; + } + + // if the node's data is external, then we cannot re-use it + if (!ggml_gallocr_is_own(galloc, parent)) { + AT_PRINTF("not reusing parent %s for %s as %p is external\n", parent->name, node->name, parent->data); + continue; + } + + // outputs cannot be reused + if (parent->flags & GGML_TENSOR_FLAG_OUTPUT || (parent->view_src != NULL && parent->view_src->flags & GGML_TENSOR_FLAG_OUTPUT)) { + AT_PRINTF("not reusing parent %s for %s as it is an output\n", parent->name, node->name); + continue; + } + + if (!ggml_are_same_layout(node, parent)) { + AT_PRINTF("not reusing parent %s for %s as layouts are different\n", parent->name, node->name); + continue; + } + + struct hash_node * p_hn = ggml_gallocr_hash_get(galloc, parent); + if (p_hn->n_children == 1 && p_hn->n_views == 0) { + if (ggml_is_view(parent)) { + struct ggml_tensor * view_src = parent->view_src; + struct hash_node * view_src_hn = ggml_gallocr_hash_get(galloc, view_src); + if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && view_src->data == parent->data) { + AT_PRINTF("reusing view parent %s (%s) for %s\n", parent->name, view_src->name, node->name); + assert(view_src_hn->offset == p_hn->offset); + hn->buffer_id = p_hn->buffer_id; + hn->offset = p_hn->offset; + p_hn->allocated = false; // avoid freeing the parent + view_src_hn->allocated = false; return; } + } else { + AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name); + hn->buffer_id = p_hn->buffer_id; + hn->offset = p_hn->offset; + p_hn->allocated = false; // avoid freeing the parent + return; } } } - ggml_tallocr_alloc(alloc, node); } + // allocate tensor from the buffer + struct ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id]; + ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id]; + size_t size = ggml_backend_buft_get_alloc_size(buft, node); + size_t offset = ggml_dyn_tallocr_alloc(alloc, size, node); + hn->buffer_id = buffer_id; + hn->offset = offset; + return; } } -static void free_node(ggml_gallocr_t galloc, struct ggml_tensor * node) { - ggml_tallocr_t alloc = node_tallocr(galloc, node); +static void ggml_gallocr_free_node(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id) { + // graph outputs are never freed + if (node->flags & GGML_TENSOR_FLAG_OUTPUT) { + AT_PRINTF("not freeing output %s\n", node->name); + return; + } - ggml_tallocr_free_tensor(alloc, node); + struct ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id]; + ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id]; + struct hash_node * hn = ggml_gallocr_hash_get(galloc, node); + size_t offset = hn->offset; + size_t size = ggml_backend_buft_get_alloc_size(buft, node); + ggml_dyn_tallocr_free_tensor(alloc, offset, size, node); + hn->allocated = false; } -static void ggml_tallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * gf) { - const int * parse_seq = galloc->parse_seq; - int parse_seq_len = galloc->parse_seq_len; +static int get_node_buffer_id(const int * node_buffer_ids, int i) { + return node_buffer_ids ? node_buffer_ids[i] : 0; +} + +static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids) { + // clear hash tables + memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *)); + memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node)); + + // allocate all graph inputs first to avoid overwriting them + for (int i = 0; i < graph->n_nodes; i++) { + if (graph->nodes[i]->flags & GGML_TENSOR_FLAG_INPUT) { + ggml_gallocr_allocate_node(galloc, graph->nodes[i], get_node_buffer_id(node_buffer_ids, i)); + } + for (int j = 0; j < GGML_MAX_SRC; j++) { + if (graph->nodes[i]->src[j] == NULL) { + break; + } + if (graph->nodes[i]->src[j]->flags & GGML_TENSOR_FLAG_INPUT) { + ggml_gallocr_allocate_node(galloc, graph->nodes[i]->src[j], get_node_buffer_id(node_buffer_ids, i)); + } + } + } // count number of children and views - for (int i = 0; i < gf->n_nodes; i++) { - struct ggml_tensor * node = gf->nodes[i]; + for (int i = 0; i < graph->n_nodes; i++) { + struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view(node)) { struct ggml_tensor * view_src = node->view_src; - hash_get(galloc, view_src)->n_views += 1; - if (node->buffer == NULL && node->data != NULL) { - // view of a pre-allocated tensor, didn't call init_view() yet - init_view(galloc, node, true); - } + ggml_gallocr_hash_get(galloc, view_src)->n_views += 1; } for (int j = 0; j < GGML_MAX_SRC; j++) { @@ -554,227 +573,283 @@ static void ggml_tallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr if (parent == NULL) { break; } - hash_get(galloc, parent)->n_children += 1; - if (ggml_is_view(parent) && parent->buffer == NULL && parent->data != NULL) { - init_view(galloc, parent, true); - } + ggml_gallocr_hash_get(galloc, parent)->n_children += 1; } } // allocate tensors - // if we have parse_seq then we allocate nodes following the list, and we only free nodes at barriers - int last_barrier_pos = 0; - int n_nodes = parse_seq_len ? parse_seq_len : gf->n_nodes; + for (int i = 0; i < graph->n_nodes; i++) { + struct ggml_tensor * node = graph->nodes[i]; + int buffer_id = get_node_buffer_id(node_buffer_ids, i); - for (int ind = 0; ind < n_nodes; ind++) { - // allocate a node if there is no parse_seq or this is not a barrier - if (parse_seq_len == 0 || parse_seq[ind] != -1) { - int i = parse_seq_len ? parse_seq[ind] : ind; - struct ggml_tensor * node = gf->nodes[i]; - - // allocate parents (leafs) - for (int j = 0; j < GGML_MAX_SRC; j++) { - struct ggml_tensor * parent = node->src[j]; - if (parent == NULL) { - break; - } - allocate_node(galloc, parent); + // allocate parents (only leafs need to be allocated at this point) + for (int j = 0; j < GGML_MAX_SRC; j++) { + struct ggml_tensor * parent = node->src[j]; + if (parent == NULL) { + break; } - - // allocate node - allocate_node(galloc, node); - - AT_PRINTF("exec: %s (%s) <= ", ggml_op_name(node->op), node->name); - for (int j = 0; j < GGML_MAX_SRC; j++) { - struct ggml_tensor * parent = node->src[j]; - if (parent == NULL) { - break; - } - AT_PRINTF("%s", parent->name); - if (j < GGML_MAX_SRC - 1 && node->src[j + 1] != NULL) { - AT_PRINTF(", "); - } - } - AT_PRINTF("\n"); + ggml_gallocr_allocate_node(galloc, parent, buffer_id); } + // allocate node + ggml_gallocr_allocate_node(galloc, node, buffer_id); + + AT_PRINTF("exec: %s (%s) <= ", ggml_op_desc(node), node->name); + for (int j = 0; j < GGML_MAX_SRC; j++) { + struct ggml_tensor * parent = node->src[j]; + if (parent == NULL) { + break; + } + AT_PRINTF("%s", parent->name); + if (j < GGML_MAX_SRC - 1 && node->src[j + 1] != NULL) { + AT_PRINTF(", "); + } + } + AT_PRINTF("\n"); + // update parents - // update immediately if there is no parse_seq - // update only at barriers if there is parse_seq - if ((parse_seq_len == 0) || parse_seq[ind] == -1) { - int update_start = parse_seq_len ? last_barrier_pos : ind; - int update_end = parse_seq_len ? ind : ind + 1; - for (int i = update_start; i < update_end; i++) { - int node_i = parse_seq_len ? parse_seq[i] : i; - struct ggml_tensor * node = gf->nodes[node_i]; + for (int j = 0; j < GGML_MAX_SRC; j++) { + struct ggml_tensor * parent = node->src[j]; + if (parent == NULL) { + break; + } + struct hash_node * p_hn = ggml_gallocr_hash_get(galloc, parent); + p_hn->n_children -= 1; - for (int j = 0; j < GGML_MAX_SRC; j++) { - struct ggml_tensor * parent = node->src[j]; - if (parent == NULL) { - break; - } - struct hash_node * p_hn = hash_get(galloc, parent); - p_hn->n_children -= 1; - - //AT_PRINTF("parent %s: %d children, %d views\n", parent->name, parent->n_children, parent->n_views); - - if (p_hn->n_children == 0 && p_hn->n_views == 0) { - if (ggml_is_view(parent)) { - struct ggml_tensor * view_src = parent->view_src; - struct hash_node * view_src_hn = hash_get(galloc, view_src); - view_src_hn->n_views -= 1; - AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src_hn->n_children, view_src_hn->n_views); - if (view_src_hn->n_views == 0 && view_src_hn->n_children == 0) { - free_node(galloc, view_src); - } - } - else { - free_node(galloc, parent); - } + AT_PRINTF("parent %s: %d children, %d views, allocated: %d\n", + parent->name, p_hn->n_children, p_hn->n_views, p_hn->allocated); + + if (p_hn->n_children == 0 && p_hn->n_views == 0) { + if (ggml_is_view(parent)) { + struct ggml_tensor * view_src = parent->view_src; + struct hash_node * view_src_hn = ggml_gallocr_hash_get(galloc, view_src); + view_src_hn->n_views -= 1; + AT_PRINTF("view_src %s: %d children, %d views\n", + view_src->name, view_src_hn->n_children, view_src_hn->n_views); + if (view_src_hn->n_views == 0 && view_src_hn->n_children == 0 && view_src_hn->allocated) { + ggml_gallocr_free_node(galloc, view_src, buffer_id); } } + else if (p_hn->allocated) { + ggml_gallocr_free_node(galloc, parent, buffer_id); + } } AT_PRINTF("\n"); - if (parse_seq_len) { - last_barrier_pos = ind + 1; + } + } +} + +bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids) { + size_t hash_size = graph->visited_hash_table.size; + + // initialize hash table + if (galloc->hash_set.size < hash_size) { + free(galloc->hash_set.keys); + free(galloc->hash_values); + galloc->hash_set.size = hash_size; + galloc->hash_set.keys = calloc(sizeof(struct ggml_tensor *), hash_size); + galloc->hash_values = calloc(sizeof(struct hash_node), hash_size); + GGML_ASSERT(galloc->hash_set.keys != NULL); + GGML_ASSERT(galloc->hash_values != NULL); + } else { + // reset hash table + memset(galloc->hash_set.keys, 0, sizeof(struct ggml_tensor *) * galloc->hash_set.size); + memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size); + } + + // reset allocators + for (int i = 0; i < galloc->n_buffers; i++) { + ggml_dyn_tallocr_reset(galloc->buf_tallocs[i]); + } + + // allocate in hash table + ggml_gallocr_alloc_graph_impl(galloc, graph, node_buffer_ids); + + // set the node_allocs from the hash table + if (galloc->n_nodes < graph->n_nodes) { + free(galloc->node_allocs); + galloc->node_allocs = calloc(sizeof(struct node_alloc), graph->n_nodes); + GGML_ASSERT(galloc->node_allocs != NULL); + } + galloc->n_nodes = graph->n_nodes; + for (int i = 0; i < graph->n_nodes; i++) { + struct ggml_tensor * node = graph->nodes[i]; + struct node_alloc * node_alloc = &galloc->node_allocs[i]; + node_alloc->buffer_id = get_node_buffer_id(node_buffer_ids, i); + if (node->view_src || node->data) { + node_alloc->dst.offset = SIZE_MAX; + node_alloc->dst.size_max = 0; + } else { + struct hash_node * hn = ggml_gallocr_hash_get(galloc, node); + node_alloc->dst.offset = hn->offset; + node_alloc->dst.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], node); + } + for (int j = 0; j < GGML_MAX_SRC; j++) { + struct ggml_tensor * src = node->src[j]; + if (!src || src->view_src || src->data) { + node_alloc->src[j].offset = SIZE_MAX; + node_alloc->src[j].size_max = 0; + } else { + struct hash_node * hn = ggml_gallocr_hash_get(galloc, src); + node_alloc->src[j].offset = hn->offset; + node_alloc->src[j].size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], src); } } } -} -size_t ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, ggml_tallocr_t talloc, struct ggml_cgraph * graph) { - size_t hash_size = graph->visited_hash_table.size; + // reallocate buffers if needed + for (int i = 0; i < galloc->n_buffers; i++) { + size_t cur_size = galloc->buffers[i] ? ggml_backend_buffer_get_size(galloc->buffers[i]) : 0; + size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]); - // check if the hash table is initialized and large enough - if (galloc->hash_set.size < hash_size) { - if (galloc->hash_set.keys != NULL) { - free(galloc->hash_set.keys); + if (new_size > cur_size) { +#ifndef NDEBUG + fprintf(stderr, "%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0); +#endif + ggml_backend_buffer_free(galloc->buffers[i]); + galloc->buffers[i] = ggml_backend_buft_alloc_buffer(galloc->bufts[i], new_size); + if (galloc->buffers[i] == NULL) { + fprintf(stderr, "%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size); + return false; + } } - if (galloc->hash_values != NULL) { - free(galloc->hash_values); + } + + return true; +} + +bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) { + return ggml_gallocr_reserve_n(galloc, graph, NULL); +} + +static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * node, struct node_alloc * node_alloc, struct tensor_alloc * tensor_alloc) { + assert(node->data || node->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[node_alloc->buffer_id], node) <= tensor_alloc->size_max); + + if (node->view_src != NULL) { + if (node->buffer == NULL) { + assert(tensor_alloc->offset == SIZE_MAX); + if (node->view_src->buffer == NULL) { + // this tensor was allocated without ggml-backend + return; + } + ggml_backend_view_init(galloc->buffers[node_alloc->buffer_id], node); } - galloc->hash_set.keys = malloc(sizeof(struct ggml_tensor *) * hash_size); - galloc->hash_set.size = hash_size; - galloc->hash_values = malloc(sizeof(struct hash_node) * hash_size); + } else { + if (node->data == NULL) { + assert(tensor_alloc->offset != SIZE_MAX); + assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[node_alloc->buffer_id], node) <= tensor_alloc->size_max); + void * base = ggml_backend_buffer_get_base(galloc->buffers[node_alloc->buffer_id]); + void * addr = (char *)base + tensor_alloc->offset; + ggml_backend_tensor_alloc(galloc->buffers[node_alloc->buffer_id], node, addr); + } else { + if (node->buffer == NULL) { + // this tensor was allocated without ggml-backend + return; + } + +#ifndef NDEBUG + size_t offset = + (char *)node->data - + (char *)ggml_backend_buffer_get_base(node->buffer); + size_t size = ggml_backend_buffer_get_alloc_size(node->buffer, node); + assert(tensor_alloc->offset == SIZE_MAX || offset == tensor_alloc->offset); + assert(tensor_alloc->offset == SIZE_MAX || size <= tensor_alloc->size_max); +#endif + } + } +} + +static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct node_alloc * nalloc, struct tensor_alloc * talloc) { + ggml_backend_buffer_type_t buft = galloc->bufts[nalloc->buffer_id]; + size_t node_size = (node->data || node->view_src) ? 0 : ggml_backend_buft_get_alloc_size(buft, node); + return talloc->size_max >= node_size; +} + +static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph * graph) { + if (galloc->n_nodes != graph->n_nodes) { +#ifndef NDEBUG + fprintf(stderr, "%s: graph has different number of nodes\n", __func__); +#endif + return true; } - // reset hash table - memset(galloc->hash_set.keys, 0, sizeof(struct ggml_tensor *) * hash_size); - memset(galloc->hash_values, 0, sizeof(struct hash_node) * hash_size); + for (int i = 0; i < graph->n_nodes; i++) { + struct ggml_tensor * node = graph->nodes[i]; + struct node_alloc * node_alloc = &galloc->node_allocs[i]; - galloc->talloc = talloc; - ggml_tallocr_alloc_graph_impl(galloc, graph); - galloc->talloc = NULL; + if (!ggml_gallocr_node_needs_realloc(galloc, node, node_alloc, &node_alloc->dst)) { +#ifndef NDEBUG + fprintf(stderr, "%s: node %s is not valid\n", __func__, node->name); +#endif + return true; + } - size_t max_size = ggml_tallocr_max_size(talloc); - - return max_size; -} - -void ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, struct ggml_hash_set hash_set, ggml_tallocr_t * hash_node_talloc) { - const size_t hash_size = hash_set.size; - - GGML_ASSERT(hash_size >= (size_t)(graph->n_nodes + graph->n_leafs)); - - galloc->talloc = NULL; - - // alloc hash_values if needed - if (galloc->hash_values == NULL || galloc->hash_values_size < hash_size) { - free(galloc->hash_values); - galloc->hash_values = malloc(sizeof(struct hash_node) * hash_size); - galloc->hash_values_size = hash_size; + for (int j = 0; j < GGML_MAX_SRC; j++) { + struct ggml_tensor * src = node->src[j]; + if (src == NULL) { + break; + } + if (!ggml_gallocr_node_needs_realloc(galloc, src, node_alloc, &node_alloc->src[j])) { +#ifndef NDEBUG + fprintf(stderr, "%s: src %d (%s) of node %s is not valid\n", __func__, j, src->name, node->name); +#endif + return true; + } + } } - // free hash_set.keys if needed - if (galloc->hash_set.keys != NULL) { - free(galloc->hash_set.keys); - } - galloc->hash_set = hash_set; - - // reset hash values - memset(galloc->hash_values, 0, sizeof(struct hash_node) * hash_size); - - galloc->hash_allocs = hash_node_talloc; - - ggml_tallocr_alloc_graph_impl(galloc, graph); - - // remove unowned resources - galloc->hash_set.keys = NULL; - galloc->hash_allocs = NULL; + return false; } -// legacy API wrapper - -struct ggml_allocr { - ggml_tallocr_t talloc; - ggml_gallocr_t galloc; -}; - -static ggml_allocr_t ggml_allocr_new_impl(ggml_tallocr_t talloc) { - ggml_allocr_t alloc = (ggml_allocr_t)malloc(sizeof(struct ggml_allocr)); - *alloc = (struct ggml_allocr) { - /*.talloc = */ talloc, - /*.galloc = */ ggml_gallocr_new(), - }; - return alloc; -} - -ggml_allocr_t ggml_allocr_new(void * data, size_t size, size_t alignment) { - return ggml_allocr_new_impl(ggml_tallocr_new(data, size, alignment)); -} - -ggml_allocr_t ggml_allocr_new_measure(size_t alignment) { - return ggml_allocr_new_impl(ggml_tallocr_new_measure(alignment)); -} - -ggml_allocr_t ggml_allocr_new_from_buffer(struct ggml_backend_buffer * buffer) { - return ggml_allocr_new_impl(ggml_tallocr_new_from_buffer(buffer)); -} - -ggml_allocr_t ggml_allocr_new_from_backend(struct ggml_backend * backend, size_t size) { - return ggml_allocr_new_impl(ggml_tallocr_new_from_backend(backend, size)); -} - -ggml_allocr_t ggml_allocr_new_measure_from_backend(struct ggml_backend * backend) { - return ggml_allocr_new_impl(ggml_tallocr_new_measure_from_backend(backend)); -} - -struct ggml_backend_buffer * ggml_allocr_get_buffer(ggml_allocr_t alloc) { - return ggml_tallocr_get_buffer(alloc->talloc); -} - -void ggml_allocr_set_parse_seq(ggml_allocr_t alloc, const int * list, int n) { - ggml_gallocr_set_parse_seq(alloc->galloc, list, n); -} - -void ggml_allocr_free(ggml_allocr_t alloc) { - if (alloc == NULL) { - return; +bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph) { + if (ggml_gallocr_needs_realloc(galloc, graph)) { + if (galloc->n_buffers == 1) { +#ifndef NDEBUG + fprintf(stderr, "%s: reallocating buffers automatically\n", __func__); +#endif + if (!ggml_gallocr_reserve(galloc, graph)) { + return false; + } + } else { +#ifndef NDEBUG + fprintf(stderr, "%s: cannot reallocate multi buffer graph automatically, call reserve\n", __func__); +#endif + return false; + } } - ggml_gallocr_free(alloc->galloc); - ggml_tallocr_free(alloc->talloc); - free(alloc); + // reset buffers + for (int i = 0; i < galloc->n_buffers; i++) { + // zero size buffers are not allocated + if (galloc->buffers[i] != NULL) { + ggml_backend_buffer_reset(galloc->buffers[i]); + } + } + + // allocate the graph tensors from the previous assignments + for (int i = 0; i < graph->n_nodes; i++) { + struct ggml_tensor * node = graph->nodes[i]; + struct node_alloc * node_alloc = &galloc->node_allocs[i]; + for (int j = 0; j < GGML_MAX_SRC; j++) { + struct ggml_tensor * src = node->src[j]; + if (src == NULL) { + break; + } + ggml_gallocr_init_tensor(galloc, src, node_alloc, &node_alloc->src[j]); + } + ggml_gallocr_init_tensor(galloc, node, node_alloc, &node_alloc->dst); + } + + return true; } -bool ggml_allocr_is_measure(ggml_allocr_t alloc) { - return ggml_tallocr_is_measure(alloc->talloc); -} +size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) { + GGML_ASSERT(buffer_id >= 0 && buffer_id < galloc->n_buffers); -void ggml_allocr_reset(ggml_allocr_t alloc) { - ggml_tallocr_reset(alloc->talloc); -} - -void ggml_allocr_alloc(ggml_allocr_t alloc, struct ggml_tensor * tensor) { - ggml_tallocr_alloc(alloc->talloc, tensor); -} - -size_t ggml_allocr_max_size(ggml_allocr_t alloc) { - return ggml_tallocr_max_size(alloc->talloc); -} - -size_t ggml_allocr_alloc_graph(ggml_allocr_t alloc, struct ggml_cgraph * graph) { - return ggml_gallocr_alloc_graph(alloc->galloc, alloc->talloc, graph); + if (galloc->buffers[buffer_id] == NULL) { + return 0; + } + return ggml_backend_buffer_get_size(galloc->buffers[buffer_id]); } // utils @@ -795,17 +870,17 @@ static bool alloc_tensor_range(struct ggml_context * ctx, return false; } - ggml_tallocr_t tallocr = ggml_tallocr_new_from_buffer(buffer); + struct ggml_tallocr * tallocr = ggml_tallocr_new(buffer); for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) { if (t->data == NULL) { if (t->view_src == NULL) { ggml_tallocr_alloc(tallocr, t); - } else { + } else if (t->buffer == NULL) { ggml_backend_view_init(buffer, t); } } else { - if (t->view_src != NULL) { + if (t->view_src != NULL && t->buffer == NULL) { // view of a pre-allocated tensor ggml_backend_view_init(buffer, t); } @@ -838,7 +913,6 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte } if (this_size > max_size) { - // tensor is too large to fit in a single buffer fprintf(stderr, "%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n", __func__, t->name, ggml_backend_buft_name(buft), @@ -870,7 +944,6 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte } if (n_buffers == 0) { - // all the tensors in the context are already allocated #ifndef NDEBUG fprintf(stderr, "%s: all tensors in the context are already allocated\n", __func__); #endif diff --git a/ggml-alloc.h b/ggml-alloc.h index 4e5997521..1d9085d15 100644 --- a/ggml-alloc.h +++ b/ggml-alloc.h @@ -6,88 +6,62 @@ extern "C" { #endif -struct ggml_backend; -struct ggml_backend_buffer; -struct ggml_backend_buffer_type; - -// -// Legacy API -// - -typedef struct ggml_allocr * ggml_allocr_t; - -// initialize allocator for use with CPU backend only -GGML_API ggml_allocr_t ggml_allocr_new(void * data, size_t size, size_t alignment); -GGML_API ggml_allocr_t ggml_allocr_new_measure(size_t alignment); - -// initialize allocator for use with ggml-backend -GGML_API ggml_allocr_t ggml_allocr_new_from_buffer(struct ggml_backend_buffer * buffer); -GGML_API ggml_allocr_t ggml_allocr_new_from_backend(struct ggml_backend * backend, size_t size); // allocates an owned buffer -GGML_API ggml_allocr_t ggml_allocr_new_measure_from_backend(struct ggml_backend * backend); - -GGML_API struct ggml_backend_buffer * ggml_allocr_get_buffer(ggml_allocr_t alloc); - -// tell the allocator to parse nodes following the order described in the list -// you should call this if your graph are optimized to execute out-of-order -GGML_API void ggml_allocr_set_parse_seq(ggml_allocr_t alloc, const int * list, int n); - -GGML_API void ggml_allocr_free (ggml_allocr_t alloc); -GGML_API bool ggml_allocr_is_measure (ggml_allocr_t alloc); -GGML_API void ggml_allocr_reset (ggml_allocr_t alloc); -GGML_API void ggml_allocr_alloc (ggml_allocr_t alloc, struct ggml_tensor * tensor); -GGML_API size_t ggml_allocr_max_size (ggml_allocr_t alloc); - -GGML_API size_t ggml_allocr_alloc_graph(ggml_allocr_t alloc, struct ggml_cgraph * graph); - -// -// ggml-backend v2 API -// - -// Separate tensor and graph allocator objects -// This is necessary for multi-backend allocation because the graph allocator needs to use multiple tensor allocators -// The original API is kept as a wrapper around the new API +typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; +typedef struct ggml_backend_buffer * ggml_backend_buffer_t; +typedef struct ggml_backend * ggml_backend_t; // Tensor allocator typedef struct ggml_tallocr * ggml_tallocr_t; -GGML_API ggml_tallocr_t ggml_tallocr_new(void * data, size_t size, size_t alignment); -GGML_API ggml_tallocr_t ggml_tallocr_new_measure(size_t alignment); -GGML_API ggml_tallocr_t ggml_tallocr_new_from_buft(struct ggml_backend_buffer_type * buft, size_t size); -GGML_API ggml_tallocr_t ggml_tallocr_new_from_backend(struct ggml_backend * backend, size_t size); // allocates an owned buffer -GGML_API ggml_tallocr_t ggml_tallocr_new_from_buffer(struct ggml_backend_buffer * buffer); -GGML_API ggml_tallocr_t ggml_tallocr_new_measure_from_buft(struct ggml_backend_buffer_type * buft); -GGML_API ggml_tallocr_t ggml_tallocr_new_measure_from_backend(struct ggml_backend * backend); - -GGML_API struct ggml_backend_buffer * ggml_tallocr_get_buffer(ggml_tallocr_t talloc); - -GGML_API void ggml_tallocr_free (ggml_tallocr_t talloc); -GGML_API bool ggml_tallocr_is_measure (ggml_tallocr_t talloc); -GGML_API void ggml_tallocr_reset (ggml_tallocr_t talloc); -GGML_API void ggml_tallocr_alloc (ggml_tallocr_t talloc, struct ggml_tensor * tensor); -GGML_API size_t ggml_tallocr_max_size (ggml_tallocr_t talloc); - +GGML_API ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer); +GGML_API void ggml_tallocr_free(ggml_tallocr_t talloc); +GGML_API void ggml_tallocr_alloc(ggml_tallocr_t talloc, struct ggml_tensor * tensor); // Graph allocator +/* + Example usage: + ggml_gallocr_t galloc = ggml_gallocr_new(ggml_bacckend_cpu_buffer_type()); + + // optional: create a worst-case graph and reserve the buffers to avoid reallocations + ggml_gallocr_reserve(galloc, build_graph(max_batch)); + + // allocate the graph + struct ggml_cgraph * graph = build_graph(batch); + ggml_gallocr_alloc_graph(galloc, graph); + + printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0)); + + // evaluate the graph + ggml_backend_graph_compute(backend, graph); +*/ + +// special tensor flags for use with the graph allocator: +// ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses +// ggml_set_output(): output tensors are never freed and never overwritten + typedef struct ggml_gallocr * ggml_gallocr_t; -GGML_API ggml_gallocr_t ggml_gallocr_new(void); -GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc); +GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft); +GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs); +GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc); -GGML_API void ggml_gallocr_set_parse_seq(ggml_gallocr_t galloc, const int * list, int n); -GGML_API size_t ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, ggml_tallocr_t talloc, struct ggml_cgraph * graph); +// pre-allocate buffers from a measure graph - does not allocate or modify the graph +// call with a worst-case graph to avoid buffer reallocations +// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed +// returns false if the buffer allocation failed +GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph); +GGML_API bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids); -// Allocate tensors from the allocators given by the hash table -GGML_API void ggml_gallocr_alloc_graph_n( - ggml_gallocr_t galloc, - struct ggml_cgraph * graph, - struct ggml_hash_set hash_set, - ggml_tallocr_t * hash_node_talloc); +// automatic reallocation if the topology changes when using a single buffer +// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers) +GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph); +GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id); // Utils // Create a buffer and allocate all the tensors in a ggml_context -GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, struct ggml_backend_buffer_type * buft); -GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, struct ggml_backend * backend); +GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft); +GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend); #ifdef __cplusplus } diff --git a/ggml-backend.c b/ggml-backend.c index 532da8eda..9ee81b766 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -475,6 +475,8 @@ ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size) { // backend CPU +static const size_t TENSOR_ALIGNMENT = 32; // required for mmap as gguf only guarantees 32-byte alignment + GGML_CALL static const char * ggml_backend_cpu_buffer_name(ggml_backend_buffer_t buffer) { return "CPU"; @@ -482,7 +484,14 @@ GGML_CALL static const char * ggml_backend_cpu_buffer_name(ggml_backend_buffer_t } GGML_CALL static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) { - return (void *)buffer->context; + uintptr_t data = (uintptr_t)buffer->context; + + // align the buffer + if (data % TENSOR_ALIGNMENT != 0) { + data = GGML_PAD(data, TENSOR_ALIGNMENT); + } + + return (void *)data; } GGML_CALL static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) { @@ -540,8 +549,6 @@ static struct ggml_backend_buffer_i cpu_backend_buffer_i_from_ptr = { /* .reset = */ NULL, }; -static const size_t TENSOR_ALIGNMENT = 64; // should be enough for AVX 512 - GGML_CALL static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend_buffer_type_t buft) { return "CPU"; @@ -550,9 +557,11 @@ GGML_CALL static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend GGML_CALL static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { size += TENSOR_ALIGNMENT; // malloc may return an address that is not aligned - void * data = malloc(size); // TODO: maybe use GGML_ALIGNED_MALLOC? - - GGML_ASSERT(data != NULL && "failed to allocate buffer"); + void * data = malloc(size); // TODO: use GGML_ALIGNED_MALLOC (move to ggml-impl.h) + if (data == NULL) { + fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size); + return NULL; + } return ggml_backend_buffer_init(buft, cpu_backend_buffer_i, data, size); } @@ -766,6 +775,9 @@ static struct ggml_backend_i cpu_backend_i = { ggml_backend_t ggml_backend_cpu_init(void) { struct ggml_backend_cpu_context * ctx = malloc(sizeof(struct ggml_backend_cpu_context)); + if (ctx == NULL) { + return NULL; + } ctx->n_threads = GGML_DEFAULT_N_THREADS; ctx->work_data = NULL; @@ -774,6 +786,10 @@ ggml_backend_t ggml_backend_cpu_init(void) { ctx->abort_callback_data = NULL; ggml_backend_t cpu_backend = malloc(sizeof(struct ggml_backend)); + if (cpu_backend == NULL) { + free(ctx); + return NULL; + } *cpu_backend = (struct ggml_backend) { /* .interface = */ cpu_backend_i, @@ -802,6 +818,7 @@ void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_ } GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size) { + GGML_ASSERT((uintptr_t)ptr % TENSOR_ALIGNMENT == 0 && "buffer pointer must be aligned"); return ggml_backend_buffer_init(ggml_backend_cpu_buffer_type(), cpu_backend_buffer_i_from_ptr, ptr, size); } @@ -865,6 +882,8 @@ GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_back ctx->n_buffers = n_buffers; ctx->buffers = (ggml_backend_buffer_t *) malloc(n_buffers * sizeof(ggml_backend_buffer_t)); + GGML_ASSERT(ctx->buffers != NULL); + size_t total_size = 0; for (size_t i = 0; i < n_buffers; i++) { ctx->buffers[i] = buffers[i]; @@ -886,6 +905,18 @@ GGML_CALL void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, } } +// creates a copy of the tensor with the same memory layout +static struct ggml_tensor * ggml_dup_tensor_layout(struct ggml_context * ctx, const struct ggml_tensor * tensor) { + struct ggml_tensor * dup = ggml_dup_tensor(ctx, tensor); + for (int i = 0; i < GGML_MAX_DIMS; i++) { + dup->nb[i] = tensor->nb[i]; + } + return dup; +} + +static bool ggml_is_view_op(enum ggml_op op) { + return op == GGML_OP_VIEW || op == GGML_OP_RESHAPE || op == GGML_OP_PERMUTE || op == GGML_OP_TRANSPOSE; +} // scheduler @@ -894,7 +925,7 @@ GGML_CALL void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, #define GGML_MAX_SPLIT_INPUTS 16 struct ggml_backend_sched_split { - ggml_tallocr_t tallocr; + int backend_id; int i_start; int i_end; struct ggml_tensor * inputs[GGML_MAX_SPLIT_INPUTS]; @@ -909,15 +940,17 @@ struct ggml_backend_sched { int n_backends; ggml_backend_t backends[GGML_MAX_BACKENDS]; ggml_backend_buffer_type_t bufts[GGML_MAX_BACKENDS]; - ggml_tallocr_t tallocs[GGML_MAX_BACKENDS]; ggml_gallocr_t galloc; // hash keys of the nodes in the graph struct ggml_hash_set hash_set; - // hash values (arrays of [hash_set.size]) - ggml_tallocr_t * node_talloc; // tallocr assigned to each node (indirectly this is the backend) - struct ggml_tensor * (* node_copies)[GGML_MAX_BACKENDS]; // copies of each node for each destination backend + // hash values + int * tensor_backend_id; + struct ggml_tensor * (* tensor_copies)[GGML_MAX_BACKENDS]; + + int * node_backend_ids; // [n_nodes] + int n_nodes; // copy of the graph with modified inputs struct ggml_cgraph * graph; @@ -927,77 +960,46 @@ struct ggml_backend_sched { struct ggml_context * ctx; + ggml_backend_sched_eval_callback callback_eval; + void * callback_eval_user_data; + // align context_buffer to GGML_MEM_ALIGN #ifdef _MSC_VER __declspec(align(GGML_MEM_ALIGN)) #else __attribute__((aligned(GGML_MEM_ALIGN))) #endif - char context_buffer[GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)]; - - ggml_backend_sched_eval_callback callback_eval; - void * callback_eval_user_data; + char context_buffer[GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)]; }; #define hash_id(node) ggml_hash_find_or_insert(sched->hash_set, node) -#define node_allocr(node) sched->node_talloc[hash_id(node)] +#define tensor_backend_id(node) sched->tensor_backend_id[hash_id(node)] +#define tensor_backend(node) (tensor_backend_id(node) == -1 ? NULL : sched->backends[tensor_backend_id(node)]) -static bool ggml_is_view_op(enum ggml_op op) { - return op == GGML_OP_VIEW || op == GGML_OP_RESHAPE || op == GGML_OP_PERMUTE || op == GGML_OP_TRANSPOSE; -} - -// returns the priority of the backend, lower is better -static int sched_backend_prio(ggml_backend_sched_t sched, ggml_backend_t backend) { +// returns the priority of the backend, lower id is higher priority +static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) { for (int i = 0; i < sched->n_backends; i++) { if (sched->backends[i] == backend) { return i; } } - return INT_MAX; + return -1; } -static int sched_allocr_prio(ggml_backend_sched_t sched, ggml_tallocr_t allocr) { - for (int i = 0; i < sched->n_backends; i++) { - if (sched->tallocs[i] == allocr) { - return i; - } - } - return INT_MAX; -} - -static ggml_tallocr_t sched_allocr_from_buffer(ggml_backend_sched_t sched, ggml_backend_buffer_t buffer) { +static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, ggml_backend_buffer_t buffer) { if (buffer == NULL) { - return NULL; - } - - // check if this is already allocate in a allocr buffer (from user manual allocations) - for (int i = 0; i < sched->n_backends; i++) { - if (ggml_tallocr_get_buffer(sched->tallocs[i]) == buffer) { - return sched->tallocs[i]; - } + return -1; } // find highest prio backend that supports the buffer type for (int i = 0; i < sched->n_backends; i++) { if (ggml_backend_buft_supports_backend(buffer->buft, sched->backends[i])) { - return sched->tallocs[i]; + return i; } } GGML_ASSERT(false && "tensor buffer type not supported by any backend"); } -static ggml_backend_t get_allocr_backend(ggml_backend_sched_t sched, ggml_tallocr_t allocr) { - if (allocr == NULL) { - return NULL; - } - for (int i = 0; i < sched->n_backends; i++) { - if (sched->tallocs[i] == allocr) { - return sched->backends[i]; - } - } - GGML_UNREACHABLE(); -} - #if 0 static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS][128]; // debug only #define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__) @@ -1008,37 +1010,39 @@ static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_MAX_SPLITS*GGML_MAX_SPLIT_I #endif // returns the backend that should be used for the node based on the current locations -static ggml_tallocr_t sched_allocr_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * node) { +static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * tensor) { + // TODO: use supports_op to check if the backend supports the op + // assign pre-allocated nodes to their backend // dst - ggml_tallocr_t cur_allocr = sched_allocr_from_buffer(sched, node->buffer); - if (cur_allocr != NULL) { + int cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor->buffer); + if (cur_backend != -1) { SET_CAUSE(node, "1.dst"); - return cur_allocr; + return cur_backend; } // view_src - if (node->view_src != NULL) { - cur_allocr = sched_allocr_from_buffer(sched, node->view_src->buffer); - if (cur_allocr != NULL) { + if (tensor->view_src != NULL) { + cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor->view_src->buffer); + if (cur_backend != -1) { SET_CAUSE(node, "1.vsrc"); - return cur_allocr; + return cur_backend; } } // assign nodes that use weights to the backend of the weights for (int i = 0; i < GGML_MAX_SRC; i++) { - const struct ggml_tensor * src = node->src[i]; + const struct ggml_tensor * src = tensor->src[i]; if (src == NULL) { break; } if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) { - ggml_tallocr_t src_allocr = sched_allocr_from_buffer(sched, src->buffer); + int src_backend = ggml_backend_sched_backend_from_buffer(sched, src->buffer); // operations with weights are always run on the same backend as the weights SET_CAUSE(node, "1.wgt%d", i); - return src_allocr; + return src_backend; } } - return NULL; + return -1; } static char * fmt_size(size_t size) { @@ -1051,11 +1055,11 @@ static char * fmt_size(size_t size) { return buffer; } -static void sched_print_assignments(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { +static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { int cur_split = 0; for (int i = 0; i < graph->n_nodes; i++) { if (cur_split < sched->n_splits && i == sched->splits[cur_split].i_start) { - ggml_backend_t split_backend = get_allocr_backend(sched, sched->splits[cur_split].tallocr); + ggml_backend_t split_backend = sched->backends[sched->splits[cur_split].backend_id]; fprintf(stderr, "\n## SPLIT #%d: %s # %d inputs: ", cur_split, ggml_backend_name(split_backend), sched->splits[cur_split].n_inputs); for (int j = 0; j < sched->splits[cur_split].n_inputs; j++) { @@ -1069,17 +1073,15 @@ static void sched_print_assignments(ggml_backend_sched_t sched, struct ggml_cgra if (ggml_is_view_op(node->op)) { continue; } - ggml_tallocr_t node_allocr = node_allocr(node); - ggml_backend_t node_backend = node_allocr ? get_allocr_backend(sched, node_allocr) : NULL; // FIXME: + ggml_backend_t tensor_backend = tensor_backend(node); fprintf(stderr, "node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name, - fmt_size(ggml_nbytes(node)), node_allocr ? ggml_backend_name(node_backend) : "NULL", GET_CAUSE(node)); + fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node)); for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { break; } - ggml_tallocr_t src_allocr = node_allocr(src); - ggml_backend_t src_backend = src_allocr ? get_allocr_backend(sched, src_allocr) : NULL; + ggml_backend_t src_backend = tensor_backend(src); fprintf(stderr, " %20.20s (%5.5s) [%5.5s %8.8s]", src->name, fmt_size(ggml_nbytes(src)), src_backend ? ggml_backend_name(src_backend) : "NULL", GET_CAUSE(src)); } @@ -1087,23 +1089,13 @@ static void sched_print_assignments(ggml_backend_sched_t sched, struct ggml_cgra } } -// creates a copy of the tensor with the same memory layout -static struct ggml_tensor * ggml_dup_tensor_layout(struct ggml_context * ctx, const struct ggml_tensor * tensor) { - struct ggml_tensor * dup = ggml_dup_tensor(ctx, tensor); - for (int i = 0; i < GGML_MAX_DIMS; i++) { - dup->nb[i] = tensor->nb[i]; - } - return dup; -} - - //#define DEBUG_PASS1 //#define DEBUG_PASS2 //#define DEBUG_PASS3 //#define DEBUG_PASS4 // assigns backends to ops and splits the graph into subgraphs that can be computed on the same backend -static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { +static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { // reset splits sched->n_splits = 0; sched->is_reset = false; @@ -1125,28 +1117,28 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g // pass 1: assign backends to ops with pre-allocated inputs for (int i = 0; i < graph->n_leafs; i++) { struct ggml_tensor * leaf = graph->leafs[i]; - if (node_allocr(leaf) != NULL) { + if (tensor_backend_id(leaf) != -1) { // do not overwrite user assignments continue; } - node_allocr(leaf) = sched_allocr_from_cur(sched, leaf); + tensor_backend_id(leaf) = ggml_backend_sched_backend_id_from_cur(sched, leaf); } for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; - if (node_allocr(node) != NULL) { + if (tensor_backend_id(node) != -1) { // do not overwrite user assignments continue; } - node_allocr(node) = sched_allocr_from_cur(sched, node); + tensor_backend_id(node) = ggml_backend_sched_backend_id_from_cur(sched, node); // src for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { break; } - if (node_allocr(src) == NULL) { - node_allocr(src) = sched_allocr_from_cur(sched, src); + if (tensor_backend_id(src) == -1) { + tensor_backend_id(src) = ggml_backend_sched_backend_id_from_cur(sched, src); } } } @@ -1161,22 +1153,22 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g // pass 2.1 expand gpu up { - ggml_tallocr_t cur_allocr = NULL; + int cur_backend_id = -1; for (int i = graph->n_nodes - 1; i >= 0; i--) { struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view_op(node->op)) { continue; } - ggml_tallocr_t node_allocr = node_allocr(node); - if (node_allocr != NULL) { - if (sched_allocr_prio(sched, node_allocr) == sched->n_backends - 1) { + int tensor_backend_id = tensor_backend_id(node); + if (tensor_backend_id != -1) { + if (tensor_backend_id == sched->n_backends - 1) { // skip cpu (lowest prio backend) - cur_allocr = NULL; + cur_backend_id = -1; } else { - cur_allocr = node_allocr; + cur_backend_id = tensor_backend_id; } } else { - node_allocr(node) = cur_allocr; + tensor_backend_id(node) = cur_backend_id; SET_CAUSE(node, "2.1"); } } @@ -1184,22 +1176,22 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g // pass 2.2 expand gpu down { - ggml_tallocr_t cur_allocr = NULL; + int cur_backend_id = -1; for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view_op(node->op)) { continue; } - ggml_tallocr_t node_allocr = node_allocr(node); - if (node_allocr != NULL) { - if (sched_allocr_prio(sched, node_allocr) == sched->n_backends - 1) { + int tensor_backend_id = tensor_backend_id(node); + if (tensor_backend_id != -1) { + if (tensor_backend_id == sched->n_backends - 1) { // skip cpu (lowest prio backend) - cur_allocr = NULL; + cur_backend_id = -1; } else { - cur_allocr = node_allocr; + cur_backend_id = tensor_backend_id; } } else { - node_allocr(node) = cur_allocr; + tensor_backend_id(node) = cur_backend_id; SET_CAUSE(node, "2.2"); } } @@ -1207,17 +1199,17 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g // pass 2.3 expand rest up { - ggml_tallocr_t cur_allocr = NULL; + int cur_backend_id = -1; for (int i = graph->n_nodes - 1; i >= 0; i--) { struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view_op(node->op)) { continue; } - ggml_tallocr_t node_allocr = node_allocr(node); - if (node_allocr != NULL) { - cur_allocr = node_allocr; + int tensor_backend_id = tensor_backend_id(node); + if (tensor_backend_id != -1) { + cur_backend_id = tensor_backend_id; } else { - node_allocr(node) = cur_allocr; + tensor_backend_id(node) = cur_backend_id; SET_CAUSE(node, "2.3"); } } @@ -1225,17 +1217,17 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g // pass 2.4 expand rest down { - ggml_tallocr_t cur_allocr = NULL; + int cur_backend_id = -1; for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view_op(node->op)) { continue; } - ggml_tallocr_t node_allocr = node_allocr(node); - if (node_allocr != NULL) { - cur_allocr = node_allocr; + int tensor_backend_id = tensor_backend_id(node); + if (tensor_backend_id != -1) { + cur_backend_id = tensor_backend_id; } else { - node_allocr(node) = cur_allocr; + tensor_backend_id(node) = cur_backend_id; SET_CAUSE(node, "2.4"); } } @@ -1247,9 +1239,9 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g // pass 3: assign backends to remaining src from dst and view_src for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; - ggml_tallocr_t cur_allocr = node_allocr(node); - if (node->view_src != NULL && cur_allocr == NULL) { - cur_allocr = node_allocr(node) = node_allocr(node->view_src); + int cur_backend_id = tensor_backend_id(node); + if (node->view_src != NULL && cur_backend_id == -1) { + cur_backend_id = tensor_backend_id(node) = tensor_backend_id(node->view_src); SET_CAUSE(node, "3.vsrc"); } for (int j = 0; j < GGML_MAX_SRC; j++) { @@ -1257,14 +1249,14 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g if (src == NULL) { break; } - ggml_tallocr_t src_allocr = node_allocr(src); - if (src_allocr == NULL) { + int src_backend_id = tensor_backend_id(src); + if (src_backend_id == -1) { if (src->view_src != NULL) { // views are always on the same backend as the source - node_allocr(src) = node_allocr(src->view_src); + tensor_backend_id(src) = tensor_backend_id(src->view_src); SET_CAUSE(src, "3.vsrc"); } else { - node_allocr(src) = cur_allocr; + tensor_backend_id(src) = cur_backend_id; SET_CAUSE(src, "3.cur"); } } @@ -1281,15 +1273,14 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; if (!ggml_is_view_op(node->op)) { - sched->splits[0].tallocr = node_allocr(node); + sched->splits[0].backend_id = tensor_backend_id(node); break; } } sched->splits[0].i_start = 0; sched->splits[0].n_inputs = 0; memset(sched->splits[0].inputs, 0, sizeof(sched->splits[0].inputs)); //HACK - ggml_tallocr_t cur_allocr = sched->splits[0].tallocr; - size_t cur_backend_id = sched_allocr_prio(sched, cur_allocr); + int cur_backend_id = sched->splits[0].backend_id; for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; @@ -1297,19 +1288,18 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g continue; } - ggml_tallocr_t node_allocr = node_allocr(node); + int tensor_backend_id = tensor_backend_id(node); - GGML_ASSERT(node_allocr != NULL); // all nodes should be assigned by now + GGML_ASSERT(tensor_backend_id != -1); // all nodes should be assigned by now - if (node_allocr != cur_allocr) { + if (tensor_backend_id != cur_backend_id) { sched->splits[cur_split].i_end = i; cur_split++; GGML_ASSERT(cur_split < GGML_MAX_SPLITS); - sched->splits[cur_split].tallocr = node_allocr; + sched->splits[cur_split].backend_id = tensor_backend_id; sched->splits[cur_split].i_start = i; sched->splits[cur_split].n_inputs = 0; - cur_allocr = node_allocr; - cur_backend_id = sched_allocr_prio(sched, cur_allocr); + cur_backend_id = tensor_backend_id; } // find inputs that are not on the same backend @@ -1318,43 +1308,25 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g if (src == NULL) { break; } - ggml_tallocr_t src_allocr = node_allocr(src); - GGML_ASSERT(src_allocr != NULL); // all inputs should be assigned by now - if (src_allocr != node_allocr) { + int src_backend_id = tensor_backend_id(src); + assert(src_backend_id != -1); // all inputs should be assigned by now + if (src_backend_id != tensor_backend_id) { // create a copy of the input in the split's backend size_t id = hash_id(src); - if (sched->node_copies[id][cur_backend_id] == NULL) { - ggml_backend_t backend = get_allocr_backend(sched, cur_allocr); + if (sched->tensor_copies[id][cur_backend_id] == NULL) { + ggml_backend_t backend = sched->backends[cur_backend_id]; struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src); ggml_format_name(tensor_copy, "%s#%s", ggml_backend_name(backend), src->name); - sched->node_copies[id][cur_backend_id] = tensor_copy; - node_allocr(tensor_copy) = cur_allocr; + sched->tensor_copies[id][cur_backend_id] = tensor_copy; + tensor_backend_id(tensor_copy) = cur_backend_id; SET_CAUSE(tensor_copy, "4.cpy"); int n_inputs = sched->splits[cur_split].n_inputs++; GGML_ASSERT(n_inputs < GGML_MAX_SPLIT_INPUTS); sched->splits[cur_split].inputs[n_inputs] = src; } - node->src[j] = sched->node_copies[id][cur_backend_id]; - -#if 0 - // check if the input is already in the split - bool found = false; - for (int k = 0; k < sched->splits[cur_split].n_inputs; k++) { - if (sched->splits[cur_split].inputs[k] == src) { - found = true; - break; - } - } - - if (!found) { - int n_inputs = sched->splits[cur_split].n_inputs++; - //printf("split %d input %d: %s (%s)\n", cur_split, n_inputs, src->name, ggml_backend_name(get_allocr_backend(sched, src_allocr))); - GGML_ASSERT(n_inputs < GGML_MAX_SPLIT_INPUTS); - sched->splits[cur_split].inputs[n_inputs] = src; - } -#endif + node->src[j] = sched->tensor_copies[id][cur_backend_id]; } } } @@ -1369,30 +1341,30 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g // sanity check: all sources should have the same backend as the node for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; - ggml_tallocr_t node_allocr = node_allocr(node); - if (node_allocr == NULL) { + ggml_backend_t tensor_backend = tensor_backend(node); + if (tensor_backend == NULL) { fprintf(stderr, "!!!!!!! %s has no backend\n", node->name); } - if (node->view_src != NULL && node_allocr != node_allocr(node->view_src)) { + if (node->view_src != NULL && tensor_backend != tensor_backend(node->view_src)) { fprintf(stderr, "!!!!!!! %s has backend %s, view_src %s has backend %s\n", - node->name, node_allocr ? ggml_backend_name(get_allocr_backend(sched, node_allocr)) : "NULL", - node->view_src->name, node_allocr(node->view_src) ? ggml_backend_name(get_allocr_backend(sched, node_allocr(node->view_src))) : "NULL"); + node->name, tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", + node->view_src->name, tensor_backend(node->view_src) ? ggml_backend_name(tensor_backend(node->view_src)) : "NULL"); } for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { break; } - ggml_tallocr_t src_allocr = node_allocr(src); - if (src_allocr != node_allocr /* && src_backend != NULL */) { // ignore nulls for now + ggml_backend_t src_backend = tensor_backend(src); + if (src_backend != tensor_backend /* && src_backend != NULL */) { fprintf(stderr, "!!!! %s has backend %s, src %d (%s) has backend %s\n", - node->name, node_allocr ? ggml_backend_name(get_allocr_backend(sched, node_allocr)) : "NULL", - j, src->name, src_allocr ? ggml_backend_name(get_allocr_backend(sched, src_allocr)) : "NULL"); + node->name, tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", + j, src->name, src_backend ? ggml_backend_name(src_backend) : "NULL"); } - if (src->view_src != NULL && src_allocr != node_allocr(src->view_src)) { + if (src->view_src != NULL && src_backend != tensor_backend(src->view_src)) { fprintf(stderr, "!!!!!!! [src] %s has backend %s, view_src %s has backend %s\n", - src->name, src_allocr ? ggml_backend_name(get_allocr_backend(sched, src_allocr)) : "NULL", - src->view_src->name, node_allocr(src->view_src) ? ggml_backend_name(get_allocr_backend(sched, node_allocr(src->view_src))) : "NULL"); + src->name, src_backend ? ggml_backend_name(src_backend) : "NULL", + src->view_src->name, tensor_backend(src->view_src) ? ggml_backend_name(tensor_backend(src->view_src)) : "NULL"); } } } @@ -1406,32 +1378,45 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g struct ggml_backend_sched_split * split = &sched->splits[i]; split->graph = ggml_graph_view(graph, split->i_start, split->i_end); - // add inputs to the graph copy so that they are allocated by ggml-alloc at the start of the split for (int j = 0; j < split->n_inputs; j++) { struct ggml_tensor * input = split->inputs[j]; - struct ggml_tensor * input_cpy = sched->node_copies[hash_id(input)][sched_allocr_prio(sched, split->tallocr)]; + struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split->backend_id]; + // add a dependency to the input source so that it is not freed before the copy is done - GGML_ASSERT(input_cpy->src[0] == NULL || input_cpy->src[0] == input); - input_cpy->src[0] = input; + struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input); + sched->node_backend_ids[graph_copy->n_nodes] = tensor_backend_id(input); + graph_copy->nodes[graph_copy->n_nodes++] = input_dep; + + // add a dependency to the input copy so that it is allocated at the start of the split + sched->node_backend_ids[graph_copy->n_nodes] = split->backend_id; graph_copy->nodes[graph_copy->n_nodes++] = input_cpy; } for (int j = split->i_start; j < split->i_end; j++) { + sched->node_backend_ids[graph_copy->n_nodes] = tensor_backend_id(graph->nodes[j]); graph_copy->nodes[graph_copy->n_nodes++] = graph->nodes[j]; } } sched->graph = graph_copy; } -static void sched_alloc_splits(ggml_backend_sched_t sched) { - ggml_gallocr_alloc_graph_n( - sched->galloc, - sched->graph, - sched->hash_set, - sched->node_talloc); +static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) { + // ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids); + if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) { +#ifndef NDEBUG + fprintf(stderr, "ggml_backend_sched: failed to allocate graph, reserving\n"); +#endif + ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids); + if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) { + fprintf(stderr, "ggml_backend_sched: failed to allocate graph\n"); + return false; + } + } + + return true; } -static void sched_compute_splits(ggml_backend_sched_t sched) { +static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) { uint64_t copy_us[GGML_MAX_BACKENDS] = {0}; uint64_t compute_us[GGML_MAX_BACKENDS] = {0}; @@ -1439,20 +1424,18 @@ static void sched_compute_splits(ggml_backend_sched_t sched) { for (int i = 0; i < sched->n_splits; i++) { struct ggml_backend_sched_split * split = &splits[i]; - ggml_backend_t split_backend = get_allocr_backend(sched, split->tallocr); - int split_backend_id = sched_backend_prio(sched, split_backend); + int split_backend_id = split->backend_id; + ggml_backend_t split_backend = sched->backends[split_backend_id]; // copy the input tensors to the split backend uint64_t copy_start_us = ggml_time_us(); for (int j = 0; j < split->n_inputs; j++) { struct ggml_tensor * input = split->inputs[j]; - struct ggml_tensor * input_cpy = sched->node_copies[hash_id(input)][split_backend_id]; + struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split_backend_id]; GGML_ASSERT(input->buffer != NULL); GGML_ASSERT(input_cpy->buffer != NULL); - // TODO: avoid this copy if it was already copied in a previous split, and the input didn't change - // this is important to avoid copying constants such as KQ_mask and inp_pos multiple times ggml_backend_tensor_copy_async(split_backend, input, input_cpy); } //ggml_backend_synchronize(split_backend); // necessary to measure copy time @@ -1468,7 +1451,9 @@ static void sched_compute_splits(ggml_backend_sched_t sched) { uint64_t compute_start_us = ggml_time_us(); if (!sched->callback_eval) { - ggml_backend_graph_compute(split_backend, &split->graph); + if (!ggml_backend_graph_compute(split_backend, &split->graph)) { + return false; + } //ggml_backend_synchronize(split_backend); // necessary to measure compute time } else { // similar to ggml_backend_compare_graph_backend @@ -1488,7 +1473,9 @@ static void sched_compute_splits(ggml_backend_sched_t sched) { struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1); - ggml_backend_graph_compute(split_backend, &gv); + if (!ggml_backend_graph_compute(split_backend, &gv)) { + return false; + } if (need && !sched->callback_eval(t, false, sched->callback_eval_user_data)) { break; @@ -1510,19 +1497,8 @@ static void sched_compute_splits(ggml_backend_sched_t sched) { } } #endif -} -static void sched_reset(ggml_backend_sched_t sched) { - for (int i = 0; i < sched->n_backends; i++) { - ggml_tallocr_reset(sched->tallocs[i]); - } - // reset state for the next run - size_t hash_size = sched->hash_set.size; - memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); - memset(sched->node_talloc, 0, sizeof(sched->node_talloc[0]) * hash_size); - memset(sched->node_copies, 0, sizeof(sched->node_copies[0]) * hash_size); - - sched->is_reset = true; + return true; } ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size) { @@ -1532,9 +1508,10 @@ ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_back struct ggml_backend_sched * sched = calloc(sizeof(struct ggml_backend_sched), 1); // initialize hash table - sched->hash_set = ggml_hash_set_new(graph_size + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS); - sched->node_talloc = calloc(sizeof(sched->node_talloc[0]) * sched->hash_set.size, 1); - sched->node_copies = calloc(sizeof(sched->node_copies[0]) * sched->hash_set.size, 1); + sched->hash_set = ggml_hash_set_new(graph_size + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS); + sched->tensor_backend_id = calloc(sizeof(sched->tensor_backend_id[0]), sched->hash_set.size); + sched->tensor_copies = calloc(sizeof(sched->tensor_copies[0]), sched->hash_set.size); + sched->node_backend_ids = calloc(sizeof(sched->node_backend_ids[0]), graph_size); sched->n_backends = n_backends; for (int i = 0; i < n_backends; i++) { @@ -1542,14 +1519,9 @@ ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_back sched->bufts[i] = bufts ? bufts[i] : ggml_backend_get_default_buffer_type(backends[i]); } - sched->galloc = ggml_gallocr_new(); + sched->galloc = ggml_gallocr_new_n(sched->bufts, n_backends); - // init measure allocs for each backend - for (int i = 0; i < n_backends; i++) { - sched->tallocs[i] = ggml_tallocr_new_measure_from_buft(sched->bufts[i]); - } - - sched_reset(sched); + ggml_backend_sched_reset(sched); return sched; } @@ -1558,49 +1530,54 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) { if (sched == NULL) { return; } - for (int i = 0; i < sched->n_backends; i++) { - ggml_tallocr_free(sched->tallocs[i]); - } ggml_gallocr_free(sched->galloc); ggml_free(sched->ctx); free(sched->hash_set.keys); - free(sched->node_talloc); - free(sched->node_copies); + free(sched->tensor_backend_id); + free(sched->tensor_copies); + free(sched->node_backend_ids); free(sched); } -void ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) { - GGML_ASSERT(ggml_tallocr_is_measure(sched->tallocs[0])); // can only be initialized once +void ggml_backend_sched_reset(ggml_backend_sched_t sched) { + // reset state for the next run + size_t hash_size = sched->hash_set.size; + memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT + memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size); + memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size); - sched_split_graph(sched, measure_graph); - sched_alloc_splits(sched); - - // allocate buffers and reset allocators - for (int i = 0; i < sched->n_backends; i++) { - size_t size = ggml_tallocr_max_size(sched->tallocs[i]); - ggml_tallocr_free(sched->tallocs[i]); - sched->tallocs[i] = ggml_tallocr_new_from_buft(sched->bufts[i], size); - } - - sched_reset(sched); + sched->is_reset = true; } -void ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { +bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) { + ggml_backend_sched_split_graph(sched, measure_graph); + + if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids)) { + return false; + } + + ggml_backend_sched_reset(sched); + return true; +} + +bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS); if (!sched->is_reset) { - sched_reset(sched); + ggml_backend_sched_reset(sched); } - sched_split_graph(sched, graph); - sched_alloc_splits(sched); - sched_compute_splits(sched); -} + ggml_backend_sched_split_graph(sched, graph); + if (!ggml_backend_sched_alloc_splits(sched)) { + return false; + } -void ggml_backend_sched_reset(ggml_backend_sched_t sched) { - sched_reset(sched); -} + if (!ggml_backend_sched_compute_splits(sched)) { + return false; + } + return true; +} void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) { sched->callback_eval = callback; @@ -1611,37 +1588,30 @@ int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) { return sched->n_splits; } -ggml_tallocr_t ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend) { - int backend_index = sched_backend_prio(sched, backend); +size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) { + int backend_index = ggml_backend_sched_backend_id(sched, backend); GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends); - return sched->tallocs[backend_index]; -} - -ggml_backend_buffer_t ggml_backend_sched_get_buffer(ggml_backend_sched_t sched, ggml_backend_t backend) { - int backend_index = sched_backend_prio(sched, backend); - GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends); - return ggml_tallocr_get_buffer(sched->tallocs[backend_index]); + return ggml_gallocr_get_buffer_size(sched->galloc, backend_index); } void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) { - int backend_index = sched_backend_prio(sched, backend); + int backend_index = ggml_backend_sched_backend_id(sched, backend); GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends); - node_allocr(node) = sched->tallocs[backend_index]; + tensor_backend_id(node) = backend_index; } ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) { - ggml_tallocr_t allocr = node_allocr(node); - if (allocr == NULL) { + int backend_index = tensor_backend_id(node); + if (backend_index == -1) { return NULL; } - return get_allocr_backend(sched, allocr); + return sched->backends[backend_index]; } // utils void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { GGML_ASSERT(tensor->buffer == NULL); - //GGML_ASSERT(tensor->data == NULL); // views of pre-allocated tensors may have the data set in ggml_new_tensor, but still need to be initialized by the backend GGML_ASSERT(tensor->view_src != NULL); GGML_ASSERT(tensor->view_src->buffer != NULL); GGML_ASSERT(tensor->view_src->data != NULL); @@ -1665,7 +1635,7 @@ void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor ggml_backend_buffer_init_tensor(buffer, tensor); } -static struct ggml_tensor * graph_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies, +static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies, struct ggml_context * ctx_allocated, struct ggml_context * ctx_unallocated, struct ggml_tensor * src) { GGML_ASSERT(src != NULL); @@ -1678,7 +1648,7 @@ static struct ggml_tensor * graph_dup_tensor(struct ggml_hash_set hash_set, stru struct ggml_tensor * dst = ggml_dup_tensor_layout(src->data && !src->view_src ? ctx_allocated : ctx_unallocated, src); if (src->view_src != NULL) { - dst->view_src = graph_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, src->view_src); + dst->view_src = graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, src->view_src); dst->view_offs = src->view_offs; } dst->op = src->op; @@ -1691,14 +1661,14 @@ static struct ggml_tensor * graph_dup_tensor(struct ggml_hash_set hash_set, stru if (s == NULL) { break; } - dst->src[i] = graph_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, s); + dst->src[i] = graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, s); } node_copies[id] = dst; return dst; } -static void graph_init_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) { +static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) { size_t id = ggml_hash_find(hash_set, src); if (node_init[id]) { return; @@ -1707,7 +1677,7 @@ static void graph_init_tensor(struct ggml_hash_set hash_set, struct ggml_tensor struct ggml_tensor * dst = node_copies[id]; if (dst->view_src != NULL) { - graph_init_tensor(hash_set, node_copies, node_init, src->view_src); + graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src); ggml_backend_view_init(dst->view_src->buffer, dst); } else { @@ -1720,17 +1690,17 @@ static void graph_init_tensor(struct ggml_hash_set hash_set, struct ggml_tensor if (s == NULL) { break; } - graph_init_tensor(hash_set, node_copies, node_init, s); + graph_copy_init_tensor(hash_set, node_copies, node_init, s); } } struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) { struct ggml_hash_set hash_set = { /* .size = */ graph->visited_hash_table.size, - /* .keys = */ calloc(sizeof(hash_set.keys[0]) * graph->visited_hash_table.size, 1) + /* .keys = */ calloc(sizeof(hash_set.keys[0]), graph->visited_hash_table.size) // NOLINT }; - struct ggml_tensor ** node_copies = calloc(sizeof(node_copies[0]) * hash_set.size, 1); - bool * node_init = calloc(sizeof(node_init[0]) * hash_set.size, 1); + struct ggml_tensor ** node_copies = calloc(sizeof(node_copies[0]), hash_set.size); // NOLINT + bool * node_init = calloc(sizeof(node_init[0]), hash_set.size); struct ggml_init_params params = { /* .mem_size = */ ggml_tensor_overhead()*hash_set.size + ggml_graph_overhead_custom(graph->size, false), @@ -1759,7 +1729,7 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s // dup nodes for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; - graph_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, node); + graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, node); } // allocate nodes @@ -1784,7 +1754,7 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s // copy data and init views for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; - graph_init_tensor(hash_set, node_copies, node_init, node); + graph_copy_init_tensor(hash_set, node_copies, node_init, node); } // build graph copy diff --git a/ggml-backend.h b/ggml-backend.h index 282b3a9b7..f13c69bff 100644 --- a/ggml-backend.h +++ b/ggml-backend.h @@ -130,11 +130,7 @@ extern "C" { // in build_graph: build_graph(...) { - // allocating tensors in a specific backend (optional, recommended: pre-allocate inputs in a different buffer) - alloc_cpu = ggml_backend_sched_get_allocr(sched, backend_cpu); - ggml_allocr_alloc(alloc_cpu, tensor); - - // manually assigning nodes to a backend (optional, shouldn't be needed in most cases) + // manually assign nodes to a backend (optional, should not be needed in most cases) struct ggml_tensor * node = ggml_mul_mat(ctx, ...); ggml_backend_sched_set_node_backend(sched, node, backend_gpu); } @@ -164,20 +160,19 @@ extern "C" { GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size); GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched); // Initialize backend buffers from a measure graph - GGML_API void ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); + GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // Get the number of splits of the last graph GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched); - GGML_API ggml_tallocr_t ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend); - GGML_API ggml_backend_buffer_t ggml_backend_sched_get_buffer (ggml_backend_sched_t sched, ggml_backend_t backend); + GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend); GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend); GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node); // Allocate and compute graph on the backend scheduler - GGML_API void ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph); + GGML_API bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph); - // Reset all assignments and allocators - must be called before using the sched allocators to allocate inputs + // Reset all assignments and allocators - must be called before changing the node backends GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched); // Set a callback to be called for each resulting node during graph compute diff --git a/ggml.c b/ggml.c index e45b78d7e..d921d82fe 100644 --- a/ggml.c +++ b/ggml.c @@ -2649,7 +2649,7 @@ static struct ggml_tensor * ggml_new_tensor_impl( /*.nb =*/ { 0, 0, 0, 0 }, /*.op =*/ GGML_OP_NONE, /*.op_params =*/ { 0 }, - /*.is_param =*/ false, + /*.flags =*/ 0, /*.grad =*/ NULL, /*.src =*/ { NULL }, /*.perf_runs =*/ 0, @@ -6551,7 +6551,7 @@ struct ggml_tensor * ggml_cross_entropy_loss_back( void ggml_set_param( struct ggml_context * ctx, struct ggml_tensor * tensor) { - tensor->is_param = true; + tensor->flags |= GGML_TENSOR_FLAG_PARAM; GGML_ASSERT(tensor->grad == NULL); tensor->grad = ggml_dup_tensor(ctx, tensor); @@ -15367,7 +15367,7 @@ static struct ggml_tensor * ggml_recompute_graph_node( return NULL; } - if (node->is_param) { + if (node->flags & GGML_TENSOR_FLAG_PARAM) { return node; } @@ -15401,7 +15401,7 @@ static struct ggml_tensor * ggml_recompute_graph_node( clone->op = node->op; clone->grad = node->grad; - clone->is_param = node->is_param; + clone->flags = node->flags; clone->extra = node->extra; for (int k = 0; k < GGML_MAX_DIMS; ++k) { clone->nb[k] = node->nb[k]; @@ -16433,7 +16433,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * for (int i = 0; i < gf->n_nodes; i++) { struct ggml_tensor * node = gf->nodes[i]; - if (node->is_param) { + if (node->flags & GGML_TENSOR_FLAG_PARAM) { GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node); ggml_build_forward_expand(gb, node->grad); } @@ -17918,7 +17918,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) { GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n", i, node->ne[0], node->ne[1], node->ne[2], - ggml_op_name(node->op), node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs, + ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" : node->grad ? "g" : " ", node->perf_runs, (double) node->perf_cycles / (double) ggml_cycles_per_ms(), (double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs, (double) node->perf_time_us / 1000.0, @@ -18011,7 +18011,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph continue; } - if (node->is_param) { + if (node->flags & GGML_TENSOR_FLAG_PARAM) { snprintf(color, sizeof(color), "yellow"); } else if (node->grad) { if (ggml_graph_find(gf, node)) { @@ -18185,7 +18185,7 @@ static enum ggml_opt_result ggml_opt_adam( int np = 0; int64_t nx = 0; for (int i = 0; i < gf->n_nodes; ++i) { - if (gf->nodes[i]->is_param) { + if (gf->nodes[i]->flags & GGML_TENSOR_FLAG_PARAM) { GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op); GGML_ASSERT(np < GGML_MAX_PARAMS); @@ -18548,7 +18548,7 @@ static enum ggml_opt_result ggml_opt_lbfgs( int np = 0; int nx = 0; for (int i = 0; i < gf->n_nodes; ++i) { - if (gf->nodes[i]->is_param) { + if (gf->nodes[i]->flags & GGML_TENSOR_FLAG_PARAM) { GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op); GGML_ASSERT(np < GGML_MAX_PARAMS); @@ -19023,6 +19023,16 @@ enum ggml_opt_result ggml_opt_resume_g( //////////////////////////////////////////////////////////////////////////////// +void ggml_set_input(struct ggml_tensor * tensor) { + tensor->flags |= GGML_TENSOR_FLAG_INPUT; +} + +void ggml_set_output(struct ggml_tensor * tensor) { + tensor->flags |= GGML_TENSOR_FLAG_OUTPUT; +} + +//////////////////////////////////////////////////////////////////////////////// + void ggml_quantize_init(enum ggml_type type) { ggml_critical_section_start(); diff --git a/ggml.h b/ggml.h index 9cfec5bac..01cecc1e1 100644 --- a/ggml.h +++ b/ggml.h @@ -505,11 +505,17 @@ extern "C" { enum ggml_log_level { GGML_LOG_LEVEL_ERROR = 2, - GGML_LOG_LEVEL_WARN = 3, - GGML_LOG_LEVEL_INFO = 4, + GGML_LOG_LEVEL_WARN = 3, + GGML_LOG_LEVEL_INFO = 4, GGML_LOG_LEVEL_DEBUG = 5 }; + enum ggml_tensor_flag { + GGML_TENSOR_FLAG_INPUT = 1, + GGML_TENSOR_FLAG_OUTPUT = 2, + GGML_TENSOR_FLAG_PARAM = 4, + }; + // ggml object struct ggml_object { size_t offs; @@ -543,7 +549,7 @@ extern "C" { // op params - allocated as int32_t for alignment int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)]; - bool is_param; + int32_t flags; struct ggml_tensor * grad; struct ggml_tensor * src[GGML_MAX_SRC]; @@ -2092,6 +2098,12 @@ extern "C" { ggml_opt_callback callback, void * callback_data); + // + // tensor flags + // + GGML_API void ggml_set_input(struct ggml_tensor * tensor); + GGML_API void ggml_set_output(struct ggml_tensor * tensor); + // // quantization // diff --git a/llama.cpp b/llama.cpp index d1ee26ce2..a5b873a7b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1872,8 +1872,6 @@ struct llama_context { // memory buffers used to evaluate the model std::vector buf_compute_meta; ggml_backend_sched_t sched = nullptr; - // allocator for the input tensors - ggml_tallocr * alloc = nullptr; // input tensors ggml_backend_buffer_t buf_input = nullptr; @@ -7199,12 +7197,10 @@ struct llm_build_context { static struct ggml_cgraph * llama_build_graph( llama_context & lctx, - const llama_batch & batch) { + const llama_batch & batch, + bool worst_case) { const auto & model = lctx.model; - // check if we should build the worst-case graph (for memory measurement) - const bool worst_case = ggml_tallocr_is_measure(lctx.alloc); - // this callback allows us to apply custom logic to each tensor (e.g. ggml-alloc, offloading, etc.) llm_build_cb cb = [&](struct ggml_tensor * cur, const char * name, int il) { if (il >= 0) { @@ -7225,77 +7221,6 @@ static struct ggml_cgraph * llama_build_graph( struct llm_build_context llm(lctx, batch, cb, worst_case); - // - // set input data - // - - if (!ggml_tallocr_is_measure(lctx.alloc)) { - if (batch.token) { - const int64_t n_tokens = batch.n_tokens; - - ggml_backend_tensor_set(lctx.inp_tokens, batch.token, 0, n_tokens*ggml_element_size(lctx.inp_tokens)); - } - - if (batch.embd) { - const int64_t n_embd = llm.n_embd; - const int64_t n_tokens = batch.n_tokens; - - ggml_backend_tensor_set(lctx.inp_embd, batch.embd, 0, n_tokens*n_embd*ggml_element_size(lctx.inp_embd)); - } - - if (batch.pos) { - const int64_t n_tokens = batch.n_tokens; - - ggml_backend_tensor_set(lctx.inp_pos, batch.pos, 0, n_tokens*ggml_element_size(lctx.inp_pos)); - } - - { - const int64_t n_kv = llm.n_kv; - const int64_t n_tokens = batch.n_tokens; - - GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_KQ_mask->buffer)); - float * data = (float *) lctx.inp_KQ_mask->data; - - for (int h = 0; h < 1; ++h) { - for (int j = 0; j < n_tokens; ++j) { - const llama_pos pos = batch.pos[j]; - const llama_seq_id seq_id = batch.seq_id[j][0]; - - for (int i = 0; i < n_kv; ++i) { - float f; - if (!lctx.kv_self.cells[i].has_seq_id(seq_id) || - (llm.causal_attn && lctx.kv_self.cells[i].pos > pos)) { - f = -INFINITY; - } else { - f = 0; - } - data[h*(n_kv*n_tokens) + j*n_kv + i] = f; - } - } - } - } - - if (llm.do_rope_shift) { - const int64_t n_ctx = llm.n_ctx; - - GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_K_shift->buffer)); - int32_t * data = (int32_t *) lctx.inp_K_shift->data; - - for (int i = 0; i < n_ctx; ++i) { - data[i] = lctx.kv_self.cells[i].delta; - } - } - - { - GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_sum->buffer)); - float * data = (float *) lctx.inp_sum->data; - - for (int i = 0; i < batch.n_tokens; ++i) { - data[i] = 1.0f/float(batch.n_tokens); - } - } - } - llm.init(); switch (model.arch) { @@ -7384,6 +7309,83 @@ static struct ggml_cgraph * llama_build_graph( return result; } +static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { + // + // set input data + // + + const auto & hparams = lctx.model.hparams; + const auto & cparams = lctx.cparams; + const auto & kv_self = lctx.kv_self; + + if (batch.token) { + const int64_t n_tokens = batch.n_tokens; + + ggml_backend_tensor_set(lctx.inp_tokens, batch.token, 0, n_tokens*ggml_element_size(lctx.inp_tokens)); + } + + if (batch.embd) { + const int64_t n_embd = hparams.n_embd; + const int64_t n_tokens = batch.n_tokens; + + ggml_backend_tensor_set(lctx.inp_embd, batch.embd, 0, n_tokens*n_embd*ggml_element_size(lctx.inp_embd)); + } + + if (batch.pos) { + const int64_t n_tokens = batch.n_tokens; + + ggml_backend_tensor_set(lctx.inp_pos, batch.pos, 0, n_tokens*ggml_element_size(lctx.inp_pos)); + } + + { + const int64_t n_kv = kv_self.n; + const int64_t n_tokens = batch.n_tokens; + + assert(ggml_backend_buffer_is_host(lctx.inp_KQ_mask->buffer)); + + float * data = (float *) lctx.inp_KQ_mask->data; + + for (int h = 0; h < 1; ++h) { + for (int j = 0; j < n_tokens; ++j) { + const llama_pos pos = batch.pos[j]; + const llama_seq_id seq_id = batch.seq_id[j][0]; + + for (int i = 0; i < n_kv; ++i) { + float f; + if (!lctx.kv_self.cells[i].has_seq_id(seq_id) || lctx.kv_self.cells[i].pos > pos) { + f = -INFINITY; + } else { + f = 0; + } + data[h*(n_kv*n_tokens) + j*n_kv + i] = f; + } + } + } + } + + + { + assert(ggml_backend_buffer_is_host(lctx.inp_sum->buffer)); + float * data = (float *) lctx.inp_sum->data; + + for (int i = 0; i < batch.n_tokens; ++i) { + data[i] = 1.0f/float(batch.n_tokens); + } + } + + if (kv_self.has_shift) { + const int64_t n_ctx = cparams.n_ctx; + + assert(ggml_backend_buffer_is_host(lctx.inp_K_shift->buffer)); + + int32_t * data = (int32_t *) lctx.inp_K_shift->data; + + for (int i = 0; i < n_ctx; ++i) { + data[i] = lctx.kv_self.cells[i].delta; + } + } +} + // decode a batch of tokens by evaluating the transformer // // - lctx: llama context @@ -7482,7 +7484,7 @@ static int llama_decode_internal( ggml_backend_sched_reset(lctx.sched); ggml_backend_sched_set_eval_callback(lctx.sched, lctx.cparams.cb_eval, lctx.cparams.cb_eval_user_data); - ggml_cgraph * gf = llama_build_graph(lctx, batch); + ggml_cgraph * gf = llama_build_graph(lctx, batch, false); // the output is always the last tensor in the graph struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1]; @@ -7527,6 +7529,9 @@ static int llama_decode_internal( if (lctx.backend_cpu != nullptr) { ggml_backend_cpu_set_n_threads(lctx.backend_cpu, n_threads); } + + llama_set_inputs(lctx, batch); + ggml_backend_sched_graph_compute(lctx.sched, gf); // fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched)); @@ -11278,23 +11283,27 @@ struct llama_context * llama_new_context_with_model( ctx->buf_compute_meta.resize(ggml_tensor_overhead()*LLAMA_MAX_NODES + ggml_graph_overhead()); ctx->sched = ggml_backend_sched_new(ctx->backends.data(), backend_buft.data(), ctx->backends.size(), LLAMA_MAX_NODES); - ctx->alloc = ggml_backend_sched_get_tallocr(ctx->sched, ctx->backend_cpu); // build worst-case graph int n_tokens = (int)std::min(cparams.n_ctx, cparams.n_batch); int n_past = cparams.n_ctx - n_tokens; llama_token token = llama_token_bos(&ctx->model); // not actually used by llama_build_graph, but required to choose between token and embedding inputs graph - ggml_cgraph * gf = llama_build_graph(*ctx, llama_batch_get_one(&token, n_tokens, n_past, 0)); + ggml_cgraph * gf = llama_build_graph(*ctx, llama_batch_get_one(&token, n_tokens, n_past, 0), true); // initialize scheduler with the worst-case graph - ggml_backend_sched_init_measure(ctx->sched, gf); - ctx->alloc = ggml_backend_sched_get_tallocr(ctx->sched, ctx->backend_cpu); + if (!ggml_backend_sched_reserve(ctx->sched, gf)) { + LLAMA_LOG_ERROR("%s: failed to allocate compute buffers\n", __func__); + llama_free(ctx); + return nullptr; + } - for (ggml_backend_t backend : ctx->backends) { - ggml_backend_buffer_t buf = ggml_backend_sched_get_buffer(ctx->sched, backend); + for (size_t i = 0; i < ctx->backends.size(); i++) { + ggml_backend_t backend = ctx->backends[i]; + ggml_backend_buffer_type_t buft = backend_buft[i]; + size_t size = ggml_backend_sched_get_buffer_size(ctx->sched, backend); LLAMA_LOG_INFO("%s: %10s compute buffer size = %8.2f MiB\n", __func__, - ggml_backend_buffer_name(buf), - ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0); + ggml_backend_buft_name(buft), + size / 1024.0 / 1024.0); } // note: the number of splits during measure is higher than during inference due to the kv shift diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last index 6ae75bc31..7a23ab162 100644 --- a/scripts/sync-ggml.last +++ b/scripts/sync-ggml.last @@ -1 +1 @@ -2c7cf49810d523b9632da393a9e8270b60bf3b24 +5070f078a67c18c11736e78316ab715ca9afde16 From 4a46d2b7923be83d6019251671ee63aa1fa0d6bc Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Mon, 12 Feb 2024 09:38:44 +0100 Subject: [PATCH 90/94] llava : remove prog parameter from ArgumentParser (#5457) * llava: remove prog parameter from ArgumentParser This commit removes the `prog` parameter from `ArgumentParser` so that it uses the default value which is the name of the script. The motivation for this change is that currently the usage output looks like this: ```console $ python examples/llava/convert-image-encoder-to-gguf.py --help usage: convert_hf_to_gguf.py [-h] ... ``` And with this change it will look like this: ```console $ python examples/llava/convert-image-encoder-to-gguf.py --help usage: convert-image-encoder-to-gguf.py [-h] ... ``` Signed-off-by: Daniel Bevenius * ci: add W503 to flake8 ignore list This commit adds W503 to the ignore list for flake8. This is done to avoid the following error: W503 line break before binary operator Signed-off-by: Daniel Bevenius --------- Signed-off-by: Daniel Bevenius --- .github/workflows/python-lint.yml | 2 +- examples/llava/convert-image-encoder-to-gguf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml index 56d17b66c..ea0a05ea1 100644 --- a/.github/workflows/python-lint.yml +++ b/.github/workflows/python-lint.yml @@ -16,5 +16,5 @@ jobs: - name: flake8 Lint uses: py-actions/flake8@v2 with: - ignore: "E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704" + ignore: "E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503" exclude: "examples/*,examples/*/**,*/**/__init__.py" diff --git a/examples/llava/convert-image-encoder-to-gguf.py b/examples/llava/convert-image-encoder-to-gguf.py index f5a3c9b46..e204b56be 100644 --- a/examples/llava/convert-image-encoder-to-gguf.py +++ b/examples/llava/convert-image-encoder-to-gguf.py @@ -71,7 +71,7 @@ def bytes_to_unicode(): return dict(zip(bs, cs)) -ap = argparse.ArgumentParser(prog="convert_hf_to_gguf.py") +ap = argparse.ArgumentParser() ap.add_argument("-m", "--model-dir", help="Path to model directory cloned from HF Hub", required=True) ap.add_argument("--use-f32", action="store_true", default=False, help="Use f32 instead of f16") ap.add_argument("--text-only", action="store_true", required=False, From 43fe07c1a4f3a58612e1d9543f7c6b556710f5d0 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Mon, 12 Feb 2024 20:22:05 +0530 Subject: [PATCH 91/94] ggml-sycl: Replace 3d ops with macro (#5458) * use macro * use macro * fix format --- ggml-sycl.cpp | 75 ++++++++++----------------------------------------- 1 file changed, 14 insertions(+), 61 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index dd562a898..cd4b3a1e1 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -11578,11 +11578,8 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst, } char * dst_ptr = (char *) dst; - const int64_t ne0 = src->ne[0]; - const int64_t nb0 = src->nb[0]; - const int64_t nb1 = src->nb[1]; - const int64_t nb2 = src->nb[2]; - const int64_t nb3 = src->nb[3]; + GGML_TENSOR_LOCALS_1(int64_t, ne, src, ne); + GGML_TENSOR_LOCALS(int64_t, nb, src, nb); const enum ggml_type type = src->type; const int64_t ts = ggml_type_size(type); const int64_t bs = ggml_blck_size(type); @@ -12426,9 +12423,7 @@ inline void ggml_sycl_op_alibi(const ggml_tensor *src0, const ggml_tensor *src1, GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); - const int64_t ne00 = src0->ne[0]; - const int64_t ne01 = src0->ne[1]; - const int64_t ne02 = src0->ne[2]; + GGML_TENSOR_LOCALS_3(int64_t, ne0, src0, ne); const int64_t nrows = ggml_nrows(src0); //const int n_past = ((int32_t *) dst->op_params)[0]; @@ -12758,15 +12753,9 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0, ggml_sycl_op_mul_mat_t op, const bool convert_src1_to_q8_1) try { - const int64_t ne00 = src0->ne[0]; - const int64_t ne01 = src0->ne[1]; - const int64_t ne02 = src0->ne[2]; - const int64_t ne03 = src0->ne[3]; + GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); - const int64_t ne10 = src1->ne[0]; - const int64_t ne11 = src1->ne[1]; - const int64_t ne12 = src1->ne[2]; - const int64_t ne13 = src1->ne[3]; + GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne); const int64_t nrows1 = ggml_nrows(src1); GGML_ASSERT(ne03 == ne13); @@ -13337,23 +13326,13 @@ static void ggml_sycl_mul_mat_mat_batched_sycl(const ggml_tensor *src0, GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_ASSERT(src1->type == GGML_TYPE_F32); - const int64_t ne00 = src0->ne[0]; GGML_UNUSED(ne00); - const int64_t ne01 = src0->ne[1]; - const int64_t ne02 = src0->ne[2]; - const int64_t ne03 = src0->ne[3]; + GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); - const int64_t nb01 = src0->nb[1]; - const int64_t nb02 = src0->nb[2]; GGML_UNUSED(nb02); - const int64_t nb03 = src0->nb[3]; GGML_UNUSED(nb03); + GGML_TENSOR_LOCALS(int64_t, nb0, src0, nb); - const int64_t ne10 = src1->ne[0]; - const int64_t ne11 = src1->ne[1]; - const int64_t ne12 = src1->ne[2]; - const int64_t ne13 = src1->ne[3]; + GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne); - const int64_t nb11 = src1->nb[1]; - const int64_t nb12 = src1->nb[2]; GGML_UNUSED(nb12); - const int64_t nb13 = src1->nb[3]; GGML_UNUSED(nb13); + GGML_TENSOR_LOCALS(int64_t, nb1, src1, nb); const int64_t ne1 = ggml_nelements(src1); const int64_t ne = ggml_nelements(dst); @@ -13655,23 +13634,15 @@ static void ggml_sycl_mul_mat_id_sycl(ggml_tensor * dst) { GGML_ASSERT(src00->backend != GGML_BACKEND_GPU_SPLIT); GGML_ASSERT(src1->type == GGML_TYPE_F32); - const int64_t ne00 = src00->ne[0]; GGML_UNUSED(ne00); - const int64_t ne01 = src00->ne[1]; - const int64_t ne02 = src00->ne[2]; - const int64_t ne03 = src00->ne[3]; + GGML_TENSOR_LOCALS(int64_t, ne0, src00, ne); //const int64_t nb01 = src00->nb[1]; - const int64_t nb02 = src00->nb[2]; GGML_UNUSED(nb02); - const int64_t nb03 = src00->nb[3]; GGML_UNUSED(nb03); + GGML_TENSOR_LOCALS(int64_t, nb0, src00, nb); - const int64_t ne10 = src1->ne[0]; - const int64_t ne11 = src1->ne[1]; - const int64_t ne12 = src1->ne[2]; - const int64_t ne13 = src1->ne[3]; + GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne); + GGML_TENSOR_LOCALS(int64_t, nb1, src1, nb); //const int64_t nb11 = src1->nb[1]; - const int64_t nb12 = src1->nb[2]; GGML_UNUSED(nb12); - const int64_t nb13 = src1->nb[3]; GGML_UNUSED(nb13); const int64_t ne1 = ggml_nelements(src1); const int64_t ne = ggml_nelements(dst); @@ -13940,25 +13911,7 @@ static void ggml_sycl_cpy(const ggml_tensor *src0, const ggml_tensor *src1, GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX); GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX); - const int64_t ne00 = src0->ne[0]; - const int64_t ne01 = src0->ne[1]; - const int64_t ne02 = src0->ne[2]; - - - const int64_t nb00 = src0->nb[0]; - const int64_t nb01 = src0->nb[1]; - const int64_t nb02 = src0->nb[2]; - const int64_t nb03 = src0->nb[3]; - - const int64_t ne10 = src1->ne[0]; - const int64_t ne11 = src1->ne[1]; - const int64_t ne12 = src1->ne[2]; - - - const int64_t nb10 = src1->nb[0]; - const int64_t nb11 = src1->nb[1]; - const int64_t nb12 = src1->nb[2]; - const int64_t nb13 = src1->nb[3]; + GGML_TENSOR_BINARY_OP_LOCALS; SYCL_CHECK(ggml_sycl_set_device(g_main_device)); dpct::queue_ptr main_stream = g_syclStreams[g_main_device_index][0]; From dbd8828eb03b9aa8d0af7e4c533d3c2f5b38aba6 Mon Sep 17 00:00:00 2001 From: Lee <44310445+lx200916@users.noreply.github.com> Date: Tue, 13 Feb 2024 01:29:57 +0800 Subject: [PATCH 92/94] py : fix persimmon `n_rot` conversion (#5460) * convert : fix persimmon offical weight conversion to write correct n_rot. * Update convert-persimmon-to-gguf.py --------- Co-authored-by: Georgi Gerganov --- convert-persimmon-to-gguf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/convert-persimmon-to-gguf.py b/convert-persimmon-to-gguf.py index d2be805d1..def210531 100755 --- a/convert-persimmon-to-gguf.py +++ b/convert-persimmon-to-gguf.py @@ -88,7 +88,8 @@ def main(): gguf_writer.add_embedding_length(hidden_size) gguf_writer.add_block_count(block_count) gguf_writer.add_feed_forward_length(hparams.ffn_hidden_size) - gguf_writer.add_rope_dimension_count(hidden_size // head_count) + # ref: https://github.com/ggerganov/llama.cpp/pull/4889/commits/eea19039fc52ea2dbd1aab45b59ab4e3e29a3443 + gguf_writer.add_rope_dimension_count(hidden_size // head_count // 2) gguf_writer.add_head_count(head_count) gguf_writer.add_head_count_kv(head_count_kv) gguf_writer.add_rope_freq_base(hparams.rotary_emb_base) From df334a11251b81fd0b6a0e51e7146e0ba9e973f2 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 12 Feb 2024 19:54:29 +0200 Subject: [PATCH 93/94] swift : package no longer use ggml dependency (#5465) * Revert "swift : update Package.swift to use ggml as dependency (#4691)" This reverts commit ece9a45e8ffb73ad461c792720c2fec28b0137bc. * spm : add ggml headers --- Package.swift | 24 +++++++++++++++++++----- spm-headers/ggml-alloc.h | 1 + spm-headers/ggml-backend.h | 1 + spm-headers/ggml.h | 1 + 4 files changed, 22 insertions(+), 5 deletions(-) create mode 120000 spm-headers/ggml-alloc.h create mode 120000 spm-headers/ggml-backend.h create mode 120000 spm-headers/ggml.h diff --git a/Package.swift b/Package.swift index 37524edee..b24c9204a 100644 --- a/Package.swift +++ b/Package.swift @@ -13,17 +13,31 @@ let package = Package( products: [ .library(name: "llama", targets: ["llama"]), ], - dependencies: [ - .package(url: "https://github.com/ggerganov/ggml.git", .branch("release")) - ], targets: [ .target( name: "llama", - dependencies: ["ggml"], path: ".", - exclude: ["ggml-metal.metal"], + exclude: [ + "cmake", + "examples", + "scripts", + "models", + "tests", + "CMakeLists.txt", + "ggml-cuda.cu", + "ggml-cuda.h", + "Makefile" + ], sources: [ + "ggml.c", "llama.cpp", + "ggml-alloc.c", + "ggml-backend.c", + "ggml-quants.c", + "ggml-metal.m", + ], + resources: [ + .process("ggml-metal.metal") ], publicHeadersPath: "spm-headers", cSettings: [ diff --git a/spm-headers/ggml-alloc.h b/spm-headers/ggml-alloc.h new file mode 120000 index 000000000..a49d385a1 --- /dev/null +++ b/spm-headers/ggml-alloc.h @@ -0,0 +1 @@ +../ggml-alloc.h \ No newline at end of file diff --git a/spm-headers/ggml-backend.h b/spm-headers/ggml-backend.h new file mode 120000 index 000000000..17c2cf14f --- /dev/null +++ b/spm-headers/ggml-backend.h @@ -0,0 +1 @@ +../ggml-backend.h \ No newline at end of file diff --git a/spm-headers/ggml.h b/spm-headers/ggml.h new file mode 120000 index 000000000..39215298f --- /dev/null +++ b/spm-headers/ggml.h @@ -0,0 +1 @@ +../ggml.h \ No newline at end of file From 099afc6274c859ca67146e725839f2d97a5ef313 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 12 Feb 2024 20:14:39 +0200 Subject: [PATCH 94/94] llama : fix quantization when tensors are missing (#5423) --- llama.cpp | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/llama.cpp b/llama.cpp index a5b873a7b..d316d067b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -772,22 +772,37 @@ struct LLM_TN { llm_arch arch; std::string operator()(llm_tensor tensor) const { + if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) { + return "__missing__"; + } return LLM_TENSOR_NAMES[arch].at(tensor); } std::string operator()(llm_tensor tensor, const std::string & suffix) const { + if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) { + return "__missing__"; + } return LLM_TENSOR_NAMES[arch].at(tensor) + "." + suffix; } std::string operator()(llm_tensor tensor, int bid) const { + if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) { + return "__missing__"; + } return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid); } std::string operator()(llm_tensor tensor, const std::string & suffix, int bid) const { + if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) { + return "__missing__"; + } return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid) + "." + suffix; } std::string operator()(llm_tensor tensor, const std::string & suffix, int bid, int xid) const { + if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) { + return "__missing__"; + } return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid, xid) + "." + suffix; } }; @@ -10227,6 +10242,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty } ++qs.i_ffn_up; } + // if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K; //} // IK: let's remove this, else Q2_K is almost the same as Q3_K_S @@ -10286,19 +10302,19 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s // K-quants case LLAMA_FTYPE_MOSTLY_Q2_K_S: - case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break; + case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break; case LLAMA_FTYPE_MOSTLY_Q3_K_XS: case LLAMA_FTYPE_MOSTLY_Q3_K_S: case LLAMA_FTYPE_MOSTLY_Q3_K_M: - case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break; + case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break; case LLAMA_FTYPE_MOSTLY_Q4_K_S: - case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break; + case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break; case LLAMA_FTYPE_MOSTLY_Q5_K_S: - case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break; - case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break; - case LLAMA_FTYPE_MOSTLY_IQ2_XXS:quantized_type = GGML_TYPE_IQ2_XXS; break; - case LLAMA_FTYPE_MOSTLY_IQ2_XS :quantized_type = GGML_TYPE_IQ2_XS; break; - case LLAMA_FTYPE_MOSTLY_IQ3_XXS:quantized_type = GGML_TYPE_IQ3_XXS; break; + case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break; + case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break; + case LLAMA_FTYPE_MOSTLY_IQ2_XXS: quantized_type = GGML_TYPE_IQ2_XXS; break; + case LLAMA_FTYPE_MOSTLY_IQ2_XS: quantized_type = GGML_TYPE_IQ2_XS; break; + case LLAMA_FTYPE_MOSTLY_IQ3_XXS: quantized_type = GGML_TYPE_IQ3_XXS; break; default: throw std::runtime_error(format("invalid output file type %d\n", ftype)); }