mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 19:21:46 +00:00
minor : warning fixes
This commit is contained in:
parent
2c9380dd2f
commit
4f9c43e3bd
@ -354,7 +354,7 @@ int main(int argc, char ** argv) {
|
|||||||
if ((int)embd.size() > max_embd_size) {
|
if ((int)embd.size() > max_embd_size) {
|
||||||
auto skipped_tokens = embd.size() - max_embd_size;
|
auto skipped_tokens = embd.size() - max_embd_size;
|
||||||
console_set_color(con_st, CONSOLE_COLOR_ERROR);
|
console_set_color(con_st, CONSOLE_COLOR_ERROR);
|
||||||
printf("<<input too long: skipped %" PRIu64 " token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : "");
|
printf("<<input too long: skipped %zu token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : "");
|
||||||
console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
|
console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
embd.resize(max_embd_size);
|
embd.resize(max_embd_size);
|
||||||
|
27
ggml-metal.m
27
ggml-metal.m
@ -256,10 +256,10 @@ bool ggml_metal_add_buffer(
|
|||||||
if (ctx->buffers[ctx->n_buffers].metal == nil) {
|
if (ctx->buffers[ctx->n_buffers].metal == nil) {
|
||||||
fprintf(stderr, "%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, aligned_size / 1024.0 / 1024.0);
|
fprintf(stderr, "%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, aligned_size / 1024.0 / 1024.0);
|
||||||
return false;
|
return false;
|
||||||
} else {
|
|
||||||
fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB\n", __func__, name, aligned_size / 1024.0 / 1024.0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB\n", __func__, name, aligned_size / 1024.0 / 1024.0);
|
||||||
|
|
||||||
++ctx->n_buffers;
|
++ctx->n_buffers;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -765,18 +765,23 @@ void ggml_metal_graph_compute(
|
|||||||
} break;
|
} break;
|
||||||
case GGML_OP_ALIBI:
|
case GGML_OP_ALIBI:
|
||||||
{
|
{
|
||||||
GGML_ASSERT((src0t == GGML_TYPE_F32));
|
|
||||||
const int n_past = ((int32_t *) src1->data)[0];
|
|
||||||
const int n_head = ((int32_t *) src1->data)[1];
|
|
||||||
const float max_bias = ((float *) src1->data)[2];
|
|
||||||
if (__builtin_popcount(n_head) != 1) {
|
|
||||||
GGML_ASSERT(false && "only power-of-two n_head implemented");
|
|
||||||
}
|
|
||||||
const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
|
|
||||||
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
|
|
||||||
if (encoder == nil) {
|
if (encoder == nil) {
|
||||||
encoder = [command_buffer computeCommandEncoder];
|
encoder = [command_buffer computeCommandEncoder];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GGML_ASSERT((src0t == GGML_TYPE_F32));
|
||||||
|
|
||||||
|
const int n_past = ((int32_t *) src1->data)[0]; UNUSED(n_past);
|
||||||
|
const int n_head = ((int32_t *) src1->data)[1];
|
||||||
|
const float max_bias = ((float *) src1->data)[2];
|
||||||
|
|
||||||
|
if (__builtin_popcount(n_head) != 1) {
|
||||||
|
GGML_ASSERT(false && "only power-of-two n_head implemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
|
||||||
|
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
|
||||||
|
|
||||||
[encoder setComputePipelineState:ctx->pipeline_alibi_f32];
|
[encoder setComputePipelineState:ctx->pipeline_alibi_f32];
|
||||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
||||||
|
Loading…
Reference in New Issue
Block a user