Skip to content

Commit

Permalink
models without vocabulary, llama.cpp part
Browse files Browse the repository at this point in the history
  • Loading branch information
Xarbirus committed Feb 28, 2024
1 parent af0b073 commit 2beaa1f
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 37 deletions.
93 changes: 59 additions & 34 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2985,10 +2985,11 @@ static const char * llama_model_type_name(e_model type) {

static const char * llama_model_vocab_type_name(enum llama_vocab_type type){
switch (type) {
case LLAMA_VOCAB_TYPE_SPM: return "SPM";
case LLAMA_VOCAB_TYPE_BPE: return "BPE";
case LLAMA_VOCAB_TYPE_WPM: return "WPM";
default: return "unknown";
case LLAMA_VOCAB_TYPE_SPM: return "SPM";
case LLAMA_VOCAB_TYPE_BPE: return "BPE";
case LLAMA_VOCAB_TYPE_WPM: return "WPM";
case LLAMA_VOCAB_TYPE_NO_VOCAB: return "no vocab";
default: return "unknown";
}
}

Expand Down Expand Up @@ -3019,15 +3020,14 @@ static void llm_load_hparams(
// get general kv
ml.get_key(LLM_KV_GENERAL_NAME, model.name, false);

// get hparams kv
ml.get_arr_n(LLM_KV_TOKENIZER_LIST, hparams.n_vocab);
ml.get_key (LLM_KV_CONTEXT_LENGTH, hparams.n_ctx_train);
ml.get_key (LLM_KV_EMBEDDING_LENGTH, hparams.n_embd);
ml.get_key (LLM_KV_FEED_FORWARD_LENGTH, hparams.n_ff);
ml.get_key (LLM_KV_ATTENTION_HEAD_COUNT, hparams.n_head);
ml.get_key (LLM_KV_BLOCK_COUNT, hparams.n_layer);
ml.get_key (LLM_KV_EXPERT_COUNT, hparams.n_expert, false);
ml.get_key (LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used, false);
ml.get_key(LLM_KV_VOCAB_SIZE, hparams.n_vocab, false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, hparams.n_vocab);
ml.get_key(LLM_KV_CONTEXT_LENGTH, hparams.n_ctx_train);
ml.get_key(LLM_KV_EMBEDDING_LENGTH, hparams.n_embd);
ml.get_key(LLM_KV_FEED_FORWARD_LENGTH, hparams.n_ff);
ml.get_key(LLM_KV_ATTENTION_HEAD_COUNT, hparams.n_head);
ml.get_key(LLM_KV_BLOCK_COUNT, hparams.n_layer);
ml.get_key(LLM_KV_EXPERT_COUNT, hparams.n_expert, false);
ml.get_key(LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used, false);

GGML_ASSERT(hparams.n_expert <= LLAMA_MAX_EXPERTS);
GGML_ASSERT(hparams.n_expert_used <= hparams.n_expert);
Expand Down Expand Up @@ -3348,30 +3348,25 @@ static void llm_load_vocab(

const auto kv = LLM_KV(model.arch);

const int token_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_LIST).c_str());
if (token_idx == -1) {
throw std::runtime_error("cannot find tokenizer vocab in model file\n");
}

const float * scores = nullptr;
const int score_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_SCORES).c_str());
if (score_idx != -1) {
scores = (const float * ) gguf_get_arr_data(ctx, score_idx);
}

const int * toktypes = nullptr;
const int toktype_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_TOKEN_TYPE).c_str());
if (toktype_idx != -1) {
toktypes = (const int * ) gguf_get_arr_data(ctx, toktype_idx);
}

// determine vocab type
{
std::string tokenizer_name;

ml.get_key(LLM_KV_TOKENIZER_MODEL, tokenizer_name);

if (tokenizer_name == "llama") {
if (tokenizer_name == "no_vocab") {
vocab.type = LLAMA_VOCAB_TYPE_NO_VOCAB;

// default special tokens
vocab.special_bos_id = -1;
vocab.special_eos_id = -1;
vocab.special_unk_id = -1;
vocab.special_sep_id = -1;
vocab.special_pad_id = -1;
vocab.linefeed_id = -1;

return;
} else if (tokenizer_name == "llama") {
vocab.type = LLAMA_VOCAB_TYPE_SPM;

// default special tokens
Expand Down Expand Up @@ -3437,6 +3432,23 @@ static void llm_load_vocab(
}
}

const int token_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_LIST).c_str());
if (token_idx == -1) {
throw std::runtime_error("cannot find tokenizer vocab in model file\n");
}

const float * scores = nullptr;
const int score_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_SCORES).c_str());
if (score_idx != -1) {
scores = (const float * ) gguf_get_arr_data(ctx, score_idx);
}

const int * toktypes = nullptr;
const int toktype_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_TOKEN_TYPE).c_str());
if (toktype_idx != -1) {
toktypes = (const int * ) gguf_get_arr_data(ctx, toktype_idx);
}

const uint32_t n_vocab = gguf_get_arr_n(ctx, token_idx);

vocab.id_to_token.resize(n_vocab);
Expand Down Expand Up @@ -4613,7 +4625,8 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam

llm_load_print_meta(ml, model);

if (model.hparams.n_vocab != model.vocab.id_to_token.size()) {
if (model.vocab.type != LLAMA_VOCAB_TYPE_NO_VOCAB &&
model.hparams.n_vocab != model.vocab.id_to_token.size()) {
throw std::runtime_error("vocab size mismatch");
}

Expand Down Expand Up @@ -8363,26 +8376,32 @@ static enum llama_vocab_type llama_vocab_get_type(const llama_vocab & vocab) {
}

static bool llama_is_normal_token(const llama_vocab & vocab, llama_token id) {
GGML_ASSERT(vocab.type != LLAMA_VOCAB_TYPE_NO_VOCAB);
return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_NORMAL;
}

static bool llama_is_unknown_token(const llama_vocab & vocab, llama_token id) {
GGML_ASSERT(vocab.type != LLAMA_VOCAB_TYPE_NO_VOCAB);
return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_UNKNOWN;
}

static bool llama_is_control_token(const llama_vocab & vocab, llama_token id) {
GGML_ASSERT(vocab.type != LLAMA_VOCAB_TYPE_NO_VOCAB);
return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_CONTROL;
}

static bool llama_is_byte_token(const llama_vocab & vocab, llama_token id) {
GGML_ASSERT(vocab.type != LLAMA_VOCAB_TYPE_NO_VOCAB);
return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_BYTE;
}

static bool llama_is_user_defined_token(const llama_vocab& vocab, llama_token id) {
GGML_ASSERT(vocab.type != LLAMA_VOCAB_TYPE_NO_VOCAB);
return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_USER_DEFINED;
}

static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) {
GGML_ASSERT(llama_vocab_get_type(vocab) != LLAMA_VOCAB_TYPE_NO_VOCAB);
GGML_ASSERT(llama_is_byte_token(vocab, id));
const auto& token_data = vocab.id_to_token.at(id);
switch (llama_vocab_get_type(vocab)) {
Expand All @@ -8403,6 +8422,7 @@ static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) {
}

static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
GGML_ASSERT(llama_vocab_get_type(vocab) != LLAMA_VOCAB_TYPE_NO_VOCAB);
static const char * hex = "0123456789ABCDEF";
switch (llama_vocab_get_type(vocab)) {
case LLAMA_VOCAB_TYPE_SPM: {
Expand Down Expand Up @@ -9246,6 +9266,8 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
}
}
} break;
case LLAMA_VOCAB_TYPE_NO_VOCAB:
GGML_ASSERT(false);
}

return output;
Expand Down Expand Up @@ -12850,8 +12872,8 @@ void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_
}

void llama_get_n_threads(struct llama_context * ctx, uint32_t * n_threads, uint32_t * n_threads_batch) {
assert(n_threads);
assert(n_threads_batch);
GGML_ASSERT(n_threads);
GGML_ASSERT(n_threads_batch);
*n_threads = ctx->cparams.n_threads;
*n_threads_batch = ctx->cparams.n_threads_batch;
}
Expand Down Expand Up @@ -12940,14 +12962,17 @@ float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i) {
}

const char * llama_token_get_text(const struct llama_model * model, llama_token token) {
GGML_ASSERT(model->vocab.type != LLAMA_VOCAB_TYPE_NO_VOCAB);
return model->vocab.id_to_token[token].text.c_str();
}

float llama_token_get_score(const struct llama_model * model, llama_token token) {
GGML_ASSERT(model->vocab.type != LLAMA_VOCAB_TYPE_NO_VOCAB);
return model->vocab.id_to_token[token].score;
}

llama_token_type llama_token_get_type(const struct llama_model * model, llama_token token) {
GGML_ASSERT(model->vocab.type != LLAMA_VOCAB_TYPE_NO_VOCAB);
return model->vocab.id_to_token[token].type;
}

Expand Down
7 changes: 4 additions & 3 deletions llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,10 @@ extern "C" {
typedef int32_t llama_seq_id;

enum llama_vocab_type {
LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece
LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece
LLAMA_VOCAB_TYPE_NO_VOCAB = 3, // For models without vocab
};

// note: these values should be synchronized with ggml_rope
Expand Down

0 comments on commit 2beaa1f

Please sign in to comment.