From a41e722ea675dedd9d14bdeff44bb7eb22c309a2 Mon Sep 17 00:00:00 2001 From: zhangkaihuo Date: Sat, 14 Sep 2024 20:26:10 +0800 Subject: [PATCH] add lora --- convert_hf_to_gguf.py | 5 +++-- convert_lora_to_gguf.py | 18 ++++++++++++------ examples/llava/minicpmv-cli.cpp | 21 +++++++++++++++++++++ 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 41063d94b684e..599a6c2475c12 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -462,7 +462,7 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]: toktypes: list[int] = [] from transformers import AutoTokenizer - tokenizer = AutoTokenizer.from_pretrained(self.dir_model) + tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True) vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab)) assert max(tokenizer.vocab.values()) < vocab_size @@ -512,6 +512,7 @@ def get_vocab_base_pre(self, tokenizer) -> str: # NOTE: if you get an error here, you need to update the convert_hf_to_gguf_update.py script # or pull the latest version of the model from Huggingface # don't edit the hashes manually! + res = "llama-bpe" if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5": # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B res = "llama-bpe" @@ -596,7 +597,7 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "bc01ce58980e1db43859146dc51b1758b3b88729b217a74792e9f8d43e479d21": # ref: https://huggingface.co/TurkuNLP/gpt3-finnish-small res = "gpt3-finnish" - + print("=============== res = ", res) if res is None: logger.warning("\n") logger.warning("**************************************************************************************") diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py index a88d0d4a978a9..bd2a307bd2e80 100755 --- a/convert_lora_to_gguf.py +++ b/convert_lora_to_gguf.py @@ -222,9 +222,9 @@ def __torch_function__(cls, func: Callable, types, args=(), kwargs=None): def get_base_tensor_name(lora_tensor_name: str) -> str: - base_name = lora_tensor_name.replace("base_model.model.", "") - base_name = base_name.replace(".lora_A.weight", ".weight") - base_name = base_name.replace(".lora_B.weight", ".weight") + base_name = lora_tensor_name.replace("base_model.model.llm.", "") + base_name = base_name.replace(".lora_A.default.weight", ".weight") + base_name = base_name.replace(".lora_B.default.weight", ".weight") return base_name @@ -338,8 +338,10 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]: if self.lazy: tensor = LazyTorchTensor.from_eager(tensor) base_name = get_base_tensor_name(name) - is_lora_a = ".lora_A.weight" in name - is_lora_b = ".lora_B.weight" in name + is_lora_a = ".lora_A.default.weight" in name + is_lora_b = ".lora_B.default.weight" in name + print(base_name, tensor, is_lora_a, is_lora_b) + assert tensor is not None if not is_lora_a and not is_lora_b: if ".base_layer.weight" in name: continue @@ -351,13 +353,17 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]: tensor_map[base_name].A = tensor else: tensor_map[base_name].B = tensor + assert tensor is not None + else: if is_lora_a: tensor_map[base_name] = PartialLoraTensor(A=tensor) else: tensor_map[base_name] = PartialLoraTensor(B=tensor) - + assert tensor is not None + print() for name, tensor in tensor_map.items(): + print(name, tensor) assert tensor.A is not None assert tensor.B is not None yield (name, cast(torch.Tensor, LoraTorchTensor(tensor.A, tensor.B))) diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp index c39853c3ff84d..cec907b28e207 100644 --- a/examples/llava/minicpmv-cli.cpp +++ b/examples/llava/minicpmv-cli.cpp @@ -211,6 +211,27 @@ static struct llava_context * llava_init_context(gpt_params * params) { return NULL; } + llama_init_result iparams; + + // load and optionally apply lora adapters + for (auto & la : params->lora_adapters) { + llama_lora_adapter_container loaded_la; + loaded_la.path = la.path; + loaded_la.scale = la.scale; + loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str()); + if (loaded_la.adapter == nullptr) { + fprintf(stderr, "%s: error: failed to apply lora adapter '%s'\n", __func__, la.path.c_str()); + // llama_free(lctx); + // llama_free_model(model); + // return iparams; + return NULL; + } + iparams.lora_adapters.push_back(loaded_la); // copy to list of loaded adapters + } + if (!params->lora_init_without_apply) { + llama_lora_adapters_apply(ctx_llama, iparams.lora_adapters); + } + auto ctx_llava = (struct llava_context *)malloc(sizeof(llava_context)); ctx_llava->ctx_llama = ctx_llama;