english : use typos to fix comments and logs (#4354)

This commit is contained in:
Richard Kiss 2023-12-12 01:53:36 -08:00 committed by GitHub
parent 6138963fb2
commit 9494d7c477
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 34 additions and 34 deletions

View File

@ -61,13 +61,13 @@
// #define LOG_TARGET stderr // #define LOG_TARGET stderr
// #include "log.h" // #include "log.h"
// //
// The log target can also be redirected to a diffrent function // The log target can also be redirected to a different function
// like so: // like so:
// //
// #define LOG_TARGET log_handler_diffrent() // #define LOG_TARGET log_handler_different()
// #include "log.h" // #include "log.h"
// //
// FILE* log_handler_diffrent() // FILE* log_handler_different()
// { // {
// return stderr; // return stderr;
// } // }
@ -421,7 +421,7 @@ inline FILE *log_handler2_impl(bool change = false, LogTriState append = LogTriS
// Disables logs entirely at runtime. // Disables logs entirely at runtime.
// Makes LOG() and LOG_TEE() produce no output, // Makes LOG() and LOG_TEE() produce no output,
// untill enabled back. // until enabled back.
#define log_disable() log_disable_impl() #define log_disable() log_disable_impl()
// INTERNAL, DO NOT USE // INTERNAL, DO NOT USE

View File

@ -585,7 +585,7 @@ def merge_multifile_models(models_plus: list[ModelPlus]) -> ModelPlus:
if any("model.embed_tokens.weight" in mp.model for mp in models_plus): if any("model.embed_tokens.weight" in mp.model for mp in models_plus):
# Transformers models put different tensors in different files, but # Transformers models put different tensors in different files, but
# don't split indivdual tensors between files. # don't split individual tensors between files.
model: LazyModel = {} model: LazyModel = {}
for mp in models_plus: for mp in models_plus:
model.update(mp.model) model.update(mp.model)
@ -678,7 +678,7 @@ class LazyUnpickler(pickle.Unpickler):
return func(*args) return func(*args)
CLASSES: dict[tuple[str, str], Any] = { CLASSES: dict[tuple[str, str], Any] = {
# getattr used here as a workaround for mypy not being smart enough to detrmine # getattr used here as a workaround for mypy not being smart enough to determine
# the staticmethods have a __func__ attribute. # the staticmethods have a __func__ attribute.
('torch._tensor', '_rebuild_from_type_v2'): getattr(rebuild_from_type_v2, '__func__'), ('torch._tensor', '_rebuild_from_type_v2'): getattr(rebuild_from_type_v2, '__func__'),
('torch._utils', '_rebuild_tensor_v2'): getattr(lazy_rebuild_tensor_v2, '__func__'), ('torch._utils', '_rebuild_tensor_v2'): getattr(lazy_rebuild_tensor_v2, '__func__'),

View File

@ -739,7 +739,7 @@ bool clip_image_preprocess(const clip_ctx * ctx, const clip_image_u8 * img, clip
temp->ny = longer_side; temp->ny = longer_side;
temp->size = 3 * longer_side * longer_side; temp->size = 3 * longer_side * longer_side;
temp->data = new uint8_t[temp->size](); temp->data = new uint8_t[temp->size]();
uint8_t bc[3] = {122, 116, 104}; // bakground color in RGB from LLaVA uint8_t bc[3] = {122, 116, 104}; // background color in RGB from LLaVA
// fill with background color // fill with background color
for (size_t i = 0; i < temp->size; i++) { for (size_t i = 0; i < temp->size; i++) {

View File

@ -51,7 +51,7 @@ def bytes_to_unicode():
The reversible bpe codes work on unicode strings. The reversible bpe codes work on unicode strings.
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
This is a signficant percentage of your normal, say, 32K bpe vocab. This is a significant percentage of your normal, say, 32K bpe vocab.
To avoid that, we want lookup tables between utf-8 bytes and unicode strings. To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
And avoids mapping to whitespace/control characters the bpe code barfs on. And avoids mapping to whitespace/control characters the bpe code barfs on.
""" """

View File

@ -1,6 +1,6 @@
# llama.cpp/examples/lookahead # llama.cpp/examples/lookahead
Demonstartion of lookahead decoding technique: Demonstration of lookahead decoding technique:
https://lmsys.org/blog/2023-11-21-lookahead-decoding/ https://lmsys.org/blog/2023-11-21-lookahead-decoding/

View File

@ -11227,7 +11227,7 @@ class binary_reader
} }
if (is_ndarray) // ndarray dimensional vector can only contain integers, and can not embed another array if (is_ndarray) // ndarray dimensional vector can only contain integers, and can not embed another array
{ {
return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimentional vector is not allowed", "size"), nullptr)); return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimensional vector is not allowed", "size"), nullptr));
} }
std::vector<size_t> dim; std::vector<size_t> dim;
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_ndarray_size(dim))) if (JSON_HEDLEY_UNLIKELY(!get_ubjson_ndarray_size(dim)))

View File

@ -114,7 +114,7 @@ export async function* llama(prompt, params = {}, config = {}) {
return content; return content;
} }
// Call llama, return an event target that you can subcribe to // Call llama, return an event target that you can subscribe to
// //
// Example: // Example:
// //

View File

@ -238,7 +238,7 @@
cache_prompt: true cache_prompt: true
}) })
/* START: Support for storing prompt templates and parameters in borwser LocalStorage */ /* START: Support for storing prompt templates and parameters in browsers LocalStorage */
const local_storage_storageKey = "llamacpp_server_local_storage"; const local_storage_storageKey = "llamacpp_server_local_storage";
@ -282,7 +282,7 @@
let importedTemplates = local_storage_getDataAsObject('user_templates') let importedTemplates = local_storage_getDataAsObject('user_templates')
if (importedTemplates) { if (importedTemplates) {
// saved templates were successfuly imported. // saved templates were successfully imported.
console.log('Processing saved templates and updating default template') console.log('Processing saved templates and updating default template')
params.value = { ...params.value, image_data: [] }; params.value = { ...params.value, image_data: [] };
@ -303,7 +303,7 @@
} }
function userTemplateResetToDefault() { function userTemplateResetToDefault() {
console.log('Reseting themplate to default') console.log('Resetting template to default')
selectedUserTemplate.value.name = 'default'; selectedUserTemplate.value.name = 'default';
selectedUserTemplate.value.data = savedUserTemplates.value['default']; selectedUserTemplate.value.data = savedUserTemplates.value['default'];
} }

View File

@ -1,6 +1,6 @@
# llama.cpp/examples/speculative # llama.cpp/examples/speculative
Demonstartion of speculative decoding and tree-based speculative decoding techniques Demonstration of speculative decoding and tree-based speculative decoding techniques
More info: More info:

View File

@ -428,7 +428,7 @@ int main(int argc, char ** argv) {
++n_past_tgt; ++n_past_tgt;
} }
// the first token is always proposed by the traget model before the speculation loop so we erase it here // the first token is always proposed by the target model before the speculation loop so we erase it here
for (int s = 0; s < n_seq_dft; ++s) { for (int s = 0; s < n_seq_dft; ++s) {
if (!drafts[s].active) { if (!drafts[s].active) {
continue; continue;

View File

@ -43,7 +43,7 @@ GGML_API size_t ggml_allocr_alloc_graph(ggml_allocr_t alloc, struct ggml_cgraph
// ggml-backend v2 API // ggml-backend v2 API
// //
// Seperate tensor and graph allocator objects // Separate tensor and graph allocator objects
// This is necessary for multi-backend allocation because the graph allocator needs to use multiple tensor allocators // This is necessary for multi-backend allocation because the graph allocator needs to use multiple tensor allocators
// The original API is kept as a wrapper around the new API // The original API is kept as a wrapper around the new API

View File

@ -3114,7 +3114,7 @@ void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restri
size_t vl = __riscv_vsetvl_e8m1(qk/2); size_t vl = __riscv_vsetvl_e8m1(qk/2);
// These tempory registers are for masking and shift operations // These temporary registers are for masking and shift operations
vuint32m2_t vt_1 = __riscv_vid_v_u32m2(vl); vuint32m2_t vt_1 = __riscv_vid_v_u32m2(vl);
vuint32m2_t vt_2 = __riscv_vsll_vv_u32m2(__riscv_vmv_v_x_u32m2(1, vl), vt_1, vl); vuint32m2_t vt_2 = __riscv_vsll_vv_u32m2(__riscv_vmv_v_x_u32m2(1, vl), vt_1, vl);
@ -4757,7 +4757,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
vl = 16; vl = 16;
// retreive lane to multiply with scale // retrieve lane to multiply with scale
vint32m2_t aux0_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 0), (scale[0]), vl); vint32m2_t aux0_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 0), (scale[0]), vl);
vint32m2_t aux0_1 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 1), (scale[1]), vl); vint32m2_t aux0_1 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 1), (scale[1]), vl);
vint32m2_t aux1_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a1, 0), (scale[2]), vl); vint32m2_t aux1_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a1, 0), (scale[2]), vl);

12
ggml.c
View File

@ -1,4 +1,4 @@
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnings on Windows
#define _USE_MATH_DEFINES // For M_PI on MSVC #define _USE_MATH_DEFINES // For M_PI on MSVC
#include "ggml-impl.h" #include "ggml-impl.h"
@ -33,7 +33,7 @@
// we should just be careful :) // we should just be careful :)
#pragma warning(disable: 4244 4267) #pragma warning(disable: 4244 4267)
// disable POSIX deprecation warnigns // disable POSIX deprecation warnings
// these functions are never going away, anyway // these functions are never going away, anyway
#pragma warning(disable: 4996) #pragma warning(disable: 4996)
#endif #endif
@ -1760,7 +1760,7 @@ static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN"); static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
// WARN: // WARN:
// Mis-confguration can lead to problem that's hard to reason about: // Mis-configuration can lead to problem that's hard to reason about:
// * At best it crash or talks nosense. // * At best it crash or talks nosense.
// * At worst it talks slightly difference but hard to perceive. // * At worst it talks slightly difference but hard to perceive.
// //
@ -7520,7 +7520,7 @@ static void ggml_compute_forward_acc_f32(
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0)); GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
// view src0 and dst with these strides and data offset inbytes during acc // view src0 and dst with these strides and data offset inbytes during acc
// nb0 is implicitely element_size because src0 and dst are contiguous // nb0 is implicitly element_size because src0 and dst are contiguous
size_t nb1 = ((int32_t *) dst->op_params)[0]; size_t nb1 = ((int32_t *) dst->op_params)[0];
size_t nb2 = ((int32_t *) dst->op_params)[1]; size_t nb2 = ((int32_t *) dst->op_params)[1];
size_t nb3 = ((int32_t *) dst->op_params)[2]; size_t nb3 = ((int32_t *) dst->op_params)[2];
@ -10161,7 +10161,7 @@ static void ggml_compute_forward_set_f32(
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0)); GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
// view src0 and dst with these strides and data offset inbytes during set // view src0 and dst with these strides and data offset inbytes during set
// nb0 is implicitely element_size because src0 and dst are contiguous // nb0 is implicitly element_size because src0 and dst are contiguous
size_t nb1 = ((int32_t *) dst->op_params)[0]; size_t nb1 = ((int32_t *) dst->op_params)[0];
size_t nb2 = ((int32_t *) dst->op_params)[1]; size_t nb2 = ((int32_t *) dst->op_params)[1];
size_t nb3 = ((int32_t *) dst->op_params)[2]; size_t nb3 = ((int32_t *) dst->op_params)[2];
@ -14475,7 +14475,7 @@ void ggml_build_backward_gradient_checkpointing(
// insert new tensors recomputing src, reusing already made replacements, // insert new tensors recomputing src, reusing already made replacements,
// remember replacements: remember new tensors with mapping from corresponding gf nodes // remember replacements: remember new tensors with mapping from corresponding gf nodes
// recurse for input tensors, // recurse for input tensors,
// unless (i.e. terminating when) input tensors are replacments (like checkpoints) // unless (i.e. terminating when) input tensors are replacements (like checkpoints)
node->src[k] = ggml_recompute_graph_node(ctx, gf, replacements, node->src[k]); node->src[k] = ggml_recompute_graph_node(ctx, gf, replacements, node->src[k]);
} }
// insert rewritten backward node with replacements made into resulting backward graph gb // insert rewritten backward node with replacements made into resulting backward graph gb

View File

@ -61,7 +61,7 @@ If you want to publish the package manually for any reason, you need to have `tw
pip install build twine pip install build twine
``` ```
Then, folow these steps to release a new version: Then, follow these steps to release a new version:
1. Bump the version in `pyproject.toml`. 1. Bump the version in `pyproject.toml`.
2. Build the package: 2. Build the package:

View File

@ -2758,7 +2758,7 @@ static void llm_load_vocab(
// The assumption is, since special tokens aren't meant to be exposed to end user, they are designed // The assumption is, since special tokens aren't meant to be exposed to end user, they are designed
// to be unmatchable by the tokenizer, therefore tokens from the vocab, which are unmatchable by the tokenizer // to be unmatchable by the tokenizer, therefore tokens from the vocab, which are unmatchable by the tokenizer
// are special tokens. // are special tokens.
// From testing, this appears to corelate 1:1 with special tokens. // From testing, this appears to correlate 1:1 with special tokens.
// //
// Counting special tokens and verifying in only one direction // Counting special tokens and verifying in only one direction
@ -5846,7 +5846,7 @@ static int llama_decode_internal(
const int64_t n_embd = hparams.n_embd; const int64_t n_embd = hparams.n_embd;
const int64_t n_vocab = hparams.n_vocab; const int64_t n_vocab = hparams.n_vocab;
// helpers for smoother batch API transistion // helpers for smoother batch API transition
// after deprecating the llama_eval calls, these will be removed // after deprecating the llama_eval calls, these will be removed
std::vector<llama_pos> pos; std::vector<llama_pos> pos;
@ -6625,12 +6625,12 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
// loop over the text // loop over the text
while (true) { while (true) {
// find the first occurence of a given special token in this fragment // find the first occurrence of a given special token in this fragment
// passing offset argument only limit the "search area" but match coordinates // passing offset argument only limit the "search area" but match coordinates
// are still relative to the source full raw_text // are still relative to the source full raw_text
auto match = raw_text->find(special_token, raw_text_base_offset); auto match = raw_text->find(special_token, raw_text_base_offset);
// no occurences found, stop processing this fragment for a given special token // no occurrences found, stop processing this fragment for a given special token
if (match == std::string::npos) break; if (match == std::string::npos) break;
// check if match is within bounds of offset <-> length // check if match is within bounds of offset <-> length
@ -7829,7 +7829,7 @@ struct llama_beam_search_data {
} }
// Min-heaps are used to efficiently collect the top-k elements (k=n_beams). // Min-heaps are used to efficiently collect the top-k elements (k=n_beams).
// The repetative patterns below reflect the 2 stages of heaps: // The repetitive patterns below reflect the 2 stages of heaps:
// * Gather elements until the vector is full, then call std::make_heap() on it. // * Gather elements until the vector is full, then call std::make_heap() on it.
// * If the heap is full and a new element is found that should be included, pop the // * If the heap is full and a new element is found that should be included, pop the
// least element to the back(), replace it with the new, then push it into the heap. // least element to the back(), replace it with the new, then push it into the heap.

View File

@ -1,4 +1,4 @@
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnings on Windows
#include "ggml.h" #include "ggml.h"
#include <cmath> #include <cmath>

View File

@ -117,7 +117,7 @@ static void usage(char * argv[]) {
printf(" --size SIZE set test size, divisible by 32 (L1_SIZE:%d)\n", L1_SIZE); printf(" --size SIZE set test size, divisible by 32 (L1_SIZE:%d)\n", L1_SIZE);
printf(" -3 use size as L1, L2, L3 sizes (L1:%d L2:%d L3:%d)\n", L1_SIZE, L2_SIZE, L3_SIZE); printf(" -3 use size as L1, L2, L3 sizes (L1:%d L2:%d L3:%d)\n", L1_SIZE, L2_SIZE, L3_SIZE);
printf(" -4 use size as L1, L2, L3, MEM sizes (L1:%d L2:%d L3:%d MEM:%d)\n", L1_SIZE, L2_SIZE, L3_SIZE, MEM_SIZE); printf(" -4 use size as L1, L2, L3, MEM sizes (L1:%d L2:%d L3:%d MEM:%d)\n", L1_SIZE, L2_SIZE, L3_SIZE, MEM_SIZE);
printf(" --op OP set test opration as quantize_row_q_reference, quantize_row_q, dequantize_row_q,\n"); printf(" --op OP set test operation as quantize_row_q_reference, quantize_row_q, dequantize_row_q,\n");
printf(" quantize_row_q_dot, vec_dot_q (all)\n"); printf(" quantize_row_q_dot, vec_dot_q (all)\n");
printf(" --type TYPE set test type as"); printf(" --type TYPE set test type as");
for (int i = 0; i < GGML_TYPE_COUNT; i++) { for (int i = 0; i < GGML_TYPE_COUNT; i++) {
@ -202,7 +202,7 @@ int main(int argc, char * argv[]) {
} }
int alignment = std::stoi(argv[i]); int alignment = std::stoi(argv[i]);
if (alignment < 0 || alignment > MAX_ALIGNMENT) { if (alignment < 0 || alignment > MAX_ALIGNMENT) {
fprintf(stderr, "error: aligment-offset must be less than %d\n", MAX_ALIGNMENT); fprintf(stderr, "error: alignment-offset must be less than %d\n", MAX_ALIGNMENT);
invalid_param = true; invalid_param = true;
break; break;
} }