Skip to content

Commit

Permalink
only add skip layers
Browse files Browse the repository at this point in the history
  • Loading branch information
zkh2016 committed Sep 19, 2024
1 parent 165ecd3 commit c36434a
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 7 deletions.
1 change: 1 addition & 0 deletions examples/llava/minicpmv-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ static struct llava_context * llava_init_context(gpt_params * params) {
//load last model
llama_model_params model_params = llama_model_params_from_gpt_params(*params);
model_params.init_time = false;
model_params.has_vocab = false;
//llama_model * model2 = llama_load_model_from_file(params->model.c_str(), model_params);
//llama_model * model2 = llama_load_model_from_file("/Users/zkh/Downloads/last_16/ggml-model-Q4_0.gguf", model_params);
model2 = llama_load_model_from_file(params->skip_model.c_str(), model_params);
Expand Down
17 changes: 10 additions & 7 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4123,7 +4123,8 @@ static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch);

static void llm_load_vocab(
llama_model_loader & ml,
llama_model & model) {
llama_model & model,
bool has_vocab) {
auto & vocab = model.vocab;

struct gguf_context * ctx = ml.meta;
Expand Down Expand Up @@ -4296,10 +4297,11 @@ static void llm_load_vocab(
} else if (vocab.type == LLAMA_VOCAB_TYPE_WPM) {
vocab.linefeed_id = vocab.special_pad_id;
} else {
printf("====unknow vocab type\n");
// const std::vector<int> ids = llama_tokenize_internal(vocab, "\xC4\x8A", false); // U+010A
// GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
// vocab.linefeed_id = ids[0];
if(has_vocab){
const std::vector<int> ids = llama_tokenize_internal(vocab, "\xC4\x8A", false); // U+010A
GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
vocab.linefeed_id = ids[0];
}
}

// special tokens
Expand Down Expand Up @@ -5930,12 +5932,12 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
throw std::runtime_error("error loading model hyperparameters: " + std::string(e.what()));
}
try {
llm_load_vocab(ml, model);
llm_load_vocab(ml, model, params.has_vocab);
} catch(const std::exception & e) {
throw std::runtime_error("error loading model vocabulary: " + std::string(e.what()));
}

if (model.skip_layers == 0){
if (params.has_vocab){
llm_load_print_meta(ml, model);
}

Expand Down Expand Up @@ -14861,6 +14863,7 @@ struct llama_model_params llama_model_default_params() {
/*.use_mmap =*/ true,
/*.use_mlock =*/ false,
/*.init_time =*/ true,
/*.has_vocab =*/ true,
};

#ifdef GGML_USE_METAL
Expand Down
1 change: 1 addition & 0 deletions llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ extern "C" {
bool use_mmap; // use mmap if possible
bool use_mlock; // force system to keep model in RAM
bool init_time;
bool has_vocab;
};

struct llama_context_params {
Expand Down

0 comments on commit c36434a

Please sign in to comment.