Skip to content

Commit

Permalink
Merge commit 'ad3a0505e3b6cd777259ee35e61d428357ffc565' into concedo_…
Browse files Browse the repository at this point in the history
…experimental

# Conflicts:
#	.github/workflows/build.yml
#	.github/workflows/close-issue.yml
#	.github/workflows/code-coverage.yml
#	.github/workflows/docker.yml
#	.github/workflows/editorconfig.yml
#	.github/workflows/nix-ci-aarch64.yml
#	.github/workflows/nix-ci.yml
#	.github/workflows/python-check-requirements.yml
#	.github/workflows/python-lint.yml
#	.github/workflows/server.yml
#	.github/workflows/zig-build.yml
#	.gitignore
#	CMakeLists.txt
#	Makefile
#	README-sycl.md
#	README.md
#	build.zig
#	common/CMakeLists.txt
#	llama.cpp
#	tests/CMakeLists.txt
#	tests/test-backend-ops.cpp
  • Loading branch information
LostRuins committed Apr 6, 2024
2 parents c348223 + ad3a050 commit 9c0fbf9
Show file tree
Hide file tree
Showing 67 changed files with 10,823 additions and 4,623 deletions.
317 changes: 258 additions & 59 deletions common/common.cpp

Large diffs are not rendered by default.

41 changes: 27 additions & 14 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,18 +104,22 @@ struct gpt_params {
// // sampling parameters
struct llama_sampling_params sparams;

std::string model = "models/7B/ggml-model-f16.gguf"; // model path
std::string model_url = ""; // model url to download
std::string model_draft = ""; // draft model for speculative decoding
std::string model_alias = "unknown"; // model alias
std::string prompt = "";
std::string prompt_file = ""; // store the external prompt file name
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
std::string input_prefix = ""; // string to prefix user inputs with
std::string input_suffix = ""; // string to suffix user inputs with
std::string model = "models/7B/ggml-model-f16.gguf"; // model path
std::string model_draft = ""; // draft model for speculative decoding
std::string model_alias = "unknown"; // model alias
std::string model_url = ""; // model url to download
std::string hf_repo = ""; // HF repo
std::string hf_file = ""; // HF file
std::string prompt = "";
std::string prompt_file = ""; // store the external prompt file name
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
std::string input_prefix = ""; // string to prefix user inputs with
std::string input_suffix = ""; // string to suffix user inputs with
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
std::string logdir = ""; // directory in which to save YAML log files
std::string logits_file = ""; // file for saving *all* logits
std::string logdir = ""; // directory in which to save YAML log files
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding
std::string logits_file = ""; // file for saving *all* logits

std::vector<llama_model_kv_override> kv_overrides;

Expand Down Expand Up @@ -155,7 +159,7 @@ struct gpt_params {
bool interactive_first = false; // wait for user input immediately
bool multiline_input = false; // reverse the usage of `\`
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
bool cont_batching = false; // insert new sequences for decoding on-the-fly
bool cont_batching = true; // insert new sequences for decoding on-the-fly

bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
bool ignore_eos = false; // ignore generated EOS tokens
Expand Down Expand Up @@ -183,6 +187,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params);

void gpt_print_usage(int argc, char ** argv, const gpt_params & params);

bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param);

std::string get_system_info(const gpt_params & params);

std::string gpt_random_prompt(std::mt19937 & rng);
Expand All @@ -208,8 +214,8 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
struct llama_model_params llama_model_params_from_gpt_params (const gpt_params & params);
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);

struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model,
struct llama_model_params params);
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, const struct llama_model_params & params);
struct llama_model * llama_load_model_from_hf(const char * repo, const char * file, const char * path_model, const struct llama_model_params & params);

// Batch utils

Expand Down Expand Up @@ -318,3 +324,10 @@ struct llama_control_vector_load_info {
// Load control vectors, scale each by strength, and add them together.
// On error, returns {-1, empty}
llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos);

//
// Split utils
//
static const char * const LLM_KV_SPLIT_NO_STR = "split.no";
static const char * const LLM_KV_SPLIT_COUNT_STR = "split.count";
static const char * const LLM_KV_SPLIT_TENSORS_COUNT_STR = "split.tensors.count";
Loading

0 comments on commit 9c0fbf9

Please sign in to comment.