From 5aaf4a8aa6801f1a85fa8f9e05fc196067d86806 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 11 Dec 2024 12:35:47 +0200 Subject: [PATCH] compute hann window --- examples/tts/convert_pt_to_hf.py | 5 +---- examples/tts/tts.cpp | 15 +++++++++++++++ gguf-py/gguf/constants.py | 3 --- gguf-py/gguf/tensor_mapping.py | 4 ---- include/llama.h | 3 --- src/llama.cpp | 27 +++++++++++---------------- 6 files changed, 27 insertions(+), 30 deletions(-) diff --git a/examples/tts/convert_pt_to_hf.py b/examples/tts/convert_pt_to_hf.py index d06624879..c4a1185a8 100644 --- a/examples/tts/convert_pt_to_hf.py +++ b/examples/tts/convert_pt_to_hf.py @@ -70,7 +70,7 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'): # keep only what we need for inference if not key.startswith('state_dict.feature_extractor.encodec.quantizer.') and \ not key.startswith('state_dict.backbone.') and \ - not key.startswith('state_dict.head.'): + not key.startswith('state_dict.head.out'): print('Skipping key: ', key) continue @@ -101,9 +101,6 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'): if new_key.endswith("gamma"): new_key = new_key.replace("gamma", "gamma.weight") - if new_key == "head.istft.window": - new_key = "head.istft.window.weight" - size_mb = value.element_size() * value.nelement() / (1024 * 1024) print(f"{size_mb:8.2f} MB - {new_key}: {value.shape}") diff --git a/examples/tts/tts.cpp b/examples/tts/tts.cpp index 684f6b2fb..f402ba8a2 100644 --- a/examples/tts/tts.cpp +++ b/examples/tts/tts.cpp @@ -57,6 +57,16 @@ static void print_usage(int, char ** argv) { LOG("\n"); } +void fill_hann_window(int length, bool periodic, float * output) { + int offset = -1; + if (periodic) { + offset = 0; + } + for (int i = 0; i < length; i++) { + output[i] = 0.5 * (1.0 - cosf((2.0 * M_PI * i) / (length + offset))); + } +} + int main(int argc, char ** argv) { common_params params; @@ -171,6 +181,11 @@ int main(int argc, char ** argv) { const int n_embd = llama_n_embd(model_cts); const float * embd = llama_get_embeddings(ctx_cts); + const int w = 1280; + std::vector hann(w); + fill_hann_window(hann.size(), true, hann.data()); + + int n = n_embd*261; LOG("result:\n"); diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index ea74354a4..f1f44c7d2 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -387,7 +387,6 @@ class MODEL_TENSOR(IntEnum): POS_NET_ATTN_K = auto() POS_NET_ATTN_V = auto() POS_NET_ATTN_OUT = auto() - HANN_WINDOW = auto() MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { @@ -569,7 +568,6 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = { MODEL_TENSOR.POS_NET_ATTN_K: "pos_net.{bid}.attn_k", MODEL_TENSOR.POS_NET_ATTN_V: "pos_net.{bid}.attn_v", MODEL_TENSOR.POS_NET_ATTN_OUT: "pos_net.{bid}.attn_output", - MODEL_TENSOR.HANN_WINDOW: "hann_window", } MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { @@ -1429,7 +1427,6 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.POS_NET_ATTN_K, MODEL_TENSOR.POS_NET_ATTN_V, MODEL_TENSOR.POS_NET_ATTN_OUT, - MODEL_TENSOR.HANN_WINDOW, ], # TODO } diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 93b70a147..5bf1f514a 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -94,10 +94,6 @@ class TensorNameMap: MODEL_TENSOR.ROPE_FACTORS_LONG: (), MODEL_TENSOR.ROPE_FACTORS_SHORT: (), - MODEL_TENSOR.HANN_WINDOW: ( - "head.istft.window", # outetts - ), - MODEL_TENSOR.CONV1D: ( "backbone.embed", # roberta ), diff --git a/include/llama.h b/include/llama.h index efbb27d21..a4abf395b 100644 --- a/include/llama.h +++ b/include/llama.h @@ -482,9 +482,6 @@ extern "C" { // Returns the total number of parameters in the model LLAMA_API uint64_t llama_model_n_params(const struct llama_model * model); - // Get a llama model tensor - LLAMA_API struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name); - // Returns true if the model contains an encoder that requires llama_encode() call LLAMA_API bool llama_model_has_encoder(const struct llama_model * model); diff --git a/src/llama.cpp b/src/llama.cpp index 536ac1df6..2638c89f2 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -627,7 +627,6 @@ enum llm_tensor { LLM_TENSOR_POS_NET_ATTN_K, LLM_TENSOR_POS_NET_ATTN_V, LLM_TENSOR_POS_NET_ATTN_OUT, - LLM_TENSOR_HANN_WINDOW, }; static const std::map> LLM_TENSOR_NAMES = { @@ -1635,7 +1634,6 @@ static const std::map> LLM_TENSOR_N { LLM_TENSOR_POS_NET_ATTN_K, "pos_net.%d.attn_k" }, { LLM_TENSOR_POS_NET_ATTN_V, "pos_net.%d.attn_v" }, { LLM_TENSOR_POS_NET_ATTN_OUT, "pos_net.%d.attn_output" }, - { LLM_TENSOR_HANN_WINDOW, "hann_window" }, }, }, { @@ -3648,6 +3646,17 @@ static int llama_get_device_count(const llama_model & model) { return (int) model.devices.size(); } +static struct ggml_tensor * llama_get_model_tensor(const struct llama_model * model, const char * name) { + auto it = std::find_if(model->tensors_by_name.begin(), model->tensors_by_name.end(), + [name](const std::pair & it) { + return it.first == name; + }); + if (it == model->tensors_by_name.end()) { + return nullptr; + } + return it->second; +} + template static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t dev, F & fn) { ggml_init_params params = { @@ -7462,7 +7471,6 @@ static const std::map llm_tensor_info_mapping = { {LLM_TENSOR_CONV_NEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, {LLM_TENSOR_CONV_NEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, {LLM_TENSOR_CONV_NEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, - {LLM_TENSOR_HANN_WINDOW, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}}, }; // checks if the weight tensor can be used with the specified buffer type and device @@ -9638,8 +9646,6 @@ static bool llm_load_tensors( model.output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {768, n_embd}, 0); model.output_b = create_tensor(tn(LLM_TENSOR_OUTPUT, "bias"), {n_embd}, 0); - - model.hann_window = create_tensor(tn(LLM_TENSOR_HANN_WINDOW, "weight"), {1280}, 0); } break; default: throw std::runtime_error("unknown architecture"); @@ -21021,17 +21027,6 @@ uint64_t llama_model_n_params(const struct llama_model * model) { return model->n_elements; } -struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name) { - auto it = std::find_if(model->tensors_by_name.begin(), model->tensors_by_name.end(), - [name](const std::pair & it) { - return it.first == name; - }); - if (it == model->tensors_by_name.end()) { - return nullptr; - } - return it->second; -} - bool llama_model_has_encoder(const struct llama_model * model) { switch (model->arch) { case LLM_ARCH_T5: return true;