minor : clean-up some warnings and style (#5094)

* minor : clean-up some warnings and style

ggml-ci

* ggml : add comment
This commit is contained in:
Georgi Gerganov 2024-01-23 14:12:57 +02:00 committed by GitHub
parent 2bed4aa3f3
commit 89758723c7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 42 additions and 53 deletions

View File

@ -216,12 +216,10 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
} }
// store the external file name in params // store the external file name in params
params.prompt_file = argv[i]; params.prompt_file = argv[i];
file.seekg(0, std::ios::end); std::ostringstream ss;
size_t size = file.tellg(); ss << file.rdbuf();
file.seekg(0, std::ios::beg); params.prompt = ss.str();
params.prompt.resize(size); fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), argv[i]);
file.read((char *)params.prompt.data(), size);
fprintf(stderr, "Read %zu bytes from binary file %s\n", size, argv[i]);
} else if (arg == "-f" || arg == "--file") { } else if (arg == "-f" || arg == "--file") {
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;

View File

@ -2,18 +2,6 @@
// so there might be still unnecessary artifacts hanging around // so there might be still unnecessary artifacts hanging around
// I'll gradually clean and extend it // I'll gradually clean and extend it
#include <cassert>
#include <cmath>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <map>
#include <regex>
#include <stdexcept>
#include <vector>
#include <sstream>
#include "clip.h" #include "clip.h"
#include "ggml.h" #include "ggml.h"
#include "ggml-alloc.h" #include "ggml-alloc.h"
@ -30,6 +18,19 @@
#define STB_IMAGE_IMPLEMENTATION #define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h" #include "stb_image.h"
#include <cassert>
#include <cmath>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <map>
#include <regex>
#include <stdexcept>
#include <vector>
#include <sstream>
#include <cinttypes>
static std::string format(const char * fmt, ...) { static std::string format(const char * fmt, ...) {
va_list ap; va_list ap;
va_list ap2; va_list ap2;
@ -217,9 +218,9 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
static void print_tensor_info(const ggml_tensor* tensor, const char* prefix = "") { static void print_tensor_info(const ggml_tensor* tensor, const char* prefix = "") {
size_t tensor_size = ggml_nbytes(tensor); size_t tensor_size = ggml_nbytes(tensor);
printf("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%d, %d, %d, %d], type: %d\n", printf("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "], type = %s\n",
prefix, ggml_n_dims(tensor), tensor->name, tensor_size, prefix, ggml_n_dims(tensor), tensor->name, tensor_size,
tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], tensor->type); tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], ggml_type_name(tensor->type));
} }
static projector_type clip_projector_type_from_string(const std::string & name) { static projector_type clip_projector_type_from_string(const std::string & name) {
@ -592,7 +593,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
mlp_3 = ggml_cont(ctx0, ggml_permute(ctx0, mlp_3, 1, 0, 2, 3)); mlp_3 = ggml_cont(ctx0, ggml_permute(ctx0, mlp_3, 1, 0, 2, 3));
mlp_3 = ggml_reshape_4d(ctx0, mlp_3, n_patch, n_patch, mlp_3->ne[1], mlp_3->ne[2]); mlp_3 = ggml_reshape_4d(ctx0, mlp_3, n_patch, n_patch, mlp_3->ne[1], mlp_3->ne[2]);
// stride = 1, padding = 1, bias is nullptr // stride = 1, padding = 1, bias is nullptr
block_1 = ggml_conv_depthwise_2d(ctx0, model.mm_model_block_1_block_0_0_w, mlp_3, nullptr, 1, 1, 1, 1, 1, 1); block_1 = ggml_conv_depthwise_2d(ctx0, model.mm_model_block_1_block_0_0_w, mlp_3, 1, 1, 1, 1, 1, 1);
// layer norm // layer norm
// // block_1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1] // // block_1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
@ -640,7 +641,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
// block_2 // block_2
{ {
// stride = 2 // stride = 2
block_1 = ggml_conv_depthwise_2d(ctx0, model.mm_model_block_2_block_0_0_w, block_1, nullptr, 2, 2, 1, 1, 1, 1); block_1 = ggml_conv_depthwise_2d(ctx0, model.mm_model_block_2_block_0_0_w, block_1, 2, 2, 1, 1, 1, 1);
// block_1 shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1] // block_1 shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1]
// layer norm // layer norm
@ -741,18 +742,10 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
{ {
std::map<enum ggml_type, uint32_t> n_type; std::map<enum ggml_type, uint32_t> n_type;
uint32_t n_type_max = 0;
enum ggml_type type_max = GGML_TYPE_F32;
for (int i = 0; i < n_tensors; i++) { for (int i = 0; i < n_tensors; i++) {
enum ggml_type type = gguf_get_tensor_type(ctx, i); enum ggml_type type = gguf_get_tensor_type(ctx, i);
n_type[type]++; n_type[type]++;
if (n_type_max < n_type[type]) {
n_type_max = n_type[type];
type_max = type;
}
} }
printf("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__); printf("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__);
@ -795,14 +788,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
size_t tensor_size = ggml_nbytes(cur); size_t tensor_size = ggml_nbytes(cur);
buffer_size += tensor_size; buffer_size += tensor_size;
if (verbosity >= 3) { if (verbosity >= 3) {
printf("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%d, %d, %d, %d], type: %d\n", __func__, i, printf("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%" PRIu64 ", %" PRIu64 ", %" PRIu64 ", %" PRIu64 "], type = %s\n",
ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], type); __func__, i, ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], ggml_type_name(type));
} }
} }
} }
buffer_size += n_tensors * 128 /* CLIP PADDING */; buffer_size += n_tensors * 128 /* CLIP PADDING */;
clip_ctx * new_clip = new clip_ctx; clip_ctx * new_clip = new clip_ctx;

View File

@ -1202,11 +1202,11 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
printf("Final Winogrande score(%d tasks): %.4lf +/- %.4lf\n", n_done, 100*p, sigma); printf("Final Winogrande score(%d tasks): %.4lf +/- %.4lf\n", n_done, 100*p, sigma);
} }
static bool deserialize_string(std::istream& in, std::string& str) { static bool deserialize_string(std::istream & in, std::string & str) {
uint32_t size; uint32_t size;
if (!in.read((char *)&size, sizeof(size)).fail()) { if (!in.read((char *)&size, sizeof(size)).fail()) {
str.resize(size); str.resize(size);
if (!in.read((char *)str.data(), size).fail()) return true; if (!in.read((char *)&str[0], size).fail()) return true;
} }
return false; return false;
} }

9
ggml.c
View File

@ -5368,14 +5368,12 @@ struct ggml_tensor * ggml_conv_depthwise_2d(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
struct ggml_tensor * c,
int s0, int s0,
int s1, int s1,
int p0, int p0,
int p1, int p1,
int d0, int d0,
int d1) { int d1) {
struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]); struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
struct ggml_tensor * im2col = ggml_im2col(ctx, new_a, struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]), ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
@ -9991,7 +9989,7 @@ static void ggml_compute_forward_mul_mat(
return; return;
} }
const int64_t tgemm0 = ggml_perf_time_us(); //const int64_t tgemm0 = ggml_perf_time_us();
for (int64_t i13 = 0; i13 < ne13; i13++) { for (int64_t i13 = 0; i13 < ne13; i13++) {
for (int64_t i12 = 0; i12 < ne12; i12++) { for (int64_t i12 = 0; i12 < ne12; i12++) {
const int64_t i03 = i13/r3; const int64_t i03 = i13/r3;
@ -16934,7 +16932,10 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
if (ggml_compute_forward_mul_mat_use_blas(node)) { if (ggml_compute_forward_mul_mat_use_blas(node)) {
if (node->src[0]->type != GGML_TYPE_F32) { if (node->src[0]->type != GGML_TYPE_F32) {
// here we need memory for fully dequantized matrix from src0 // here we need memory for fully dequantized matrix from src0
cur = ggml_type_size(GGML_TYPE_F32)*ggml_nelements(node->src[0]); // take into account that src0 can be broadcasted into src1[2,3]
cur = ggml_type_size(GGML_TYPE_F32)
* node->src[0]->ne[0]*node->src[0]->ne[1]
* node->src[1]->ne[2]*node->src[1]->ne[3];
} }
} else } else
#endif #endif

1
ggml.h
View File

@ -1499,7 +1499,6 @@ extern "C" {
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
struct ggml_tensor * c,
int s0, int s0,
int s1, int s1,
int p0, int p0,