#include <common-base.h>
◆ antiprompt
std::vector<std::string> gpt_params::antiprompt |
◆ api_keys
std::vector<std::string> gpt_params::api_keys |
◆ attention_type
enum llama_attention_type gpt_params::attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED |
◆ cache_type_k
std::string gpt_params::cache_type_k = "f16" |
◆ cache_type_v
std::string gpt_params::cache_type_v = "f16" |
◆ cb_eval
ggml_backend_sched_eval_callback gpt_params::cb_eval = nullptr |
◆ cb_eval_user_data
void* gpt_params::cb_eval_user_data = nullptr |
◆ chat_template
std::string gpt_params::chat_template = "" |
◆ check_tensors
bool gpt_params::check_tensors = false |
◆ chunk_separator
std::string gpt_params::chunk_separator = "\n" |
◆ chunk_size
int32_t gpt_params::chunk_size = 64 |
◆ compute_ppl
bool gpt_params::compute_ppl = true |
◆ cont_batching
bool gpt_params::cont_batching = true |
◆ context_files
std::vector<std::string> gpt_params::context_files |
◆ control_vector_layer_end
int32_t gpt_params::control_vector_layer_end = -1 |
◆ control_vector_layer_start
int32_t gpt_params::control_vector_layer_start = -1 |
◆ control_vectors
std::vector<llama_control_vector_load_info> gpt_params::control_vectors |
◆ conversation
bool gpt_params::conversation = false |
◆ cvector_dimre_method
◆ cvector_negative_file
std::string gpt_params::cvector_negative_file = "examples/cvector-generator/negative.txt" |
◆ cvector_outfile
std::string gpt_params::cvector_outfile = "control_vector.gguf" |
◆ cvector_positive_file
std::string gpt_params::cvector_positive_file = "examples/cvector-generator/positive.txt" |
◆ defrag_thold
float gpt_params::defrag_thold = -1.0f |
◆ display_prompt
bool gpt_params::display_prompt = true |
◆ dump_kv_cache
bool gpt_params::dump_kv_cache = false |
◆ embd_normalize
int32_t gpt_params::embd_normalize = 2 |
◆ embd_out
std::string gpt_params::embd_out = "" |
◆ embd_sep
std::string gpt_params::embd_sep = "\n" |
◆ embedding
bool gpt_params::embedding = false |
◆ enable_chat_template
bool gpt_params::enable_chat_template = true |
◆ endpoint_metrics
bool gpt_params::endpoint_metrics = false |
◆ endpoint_slots
bool gpt_params::endpoint_slots = true |
◆ escape
bool gpt_params::escape = true |
◆ flash_attn
bool gpt_params::flash_attn = false |
◆ grp_attn_n
int32_t gpt_params::grp_attn_n = 1 |
◆ grp_attn_w
int32_t gpt_params::grp_attn_w = 512 |
◆ hellaswag
bool gpt_params::hellaswag = false |
◆ hellaswag_tasks
size_t gpt_params::hellaswag_tasks = 400 |
◆ hf_file
std::string gpt_params::hf_file = "" |
◆ hf_repo
std::string gpt_params::hf_repo = "" |
◆ hf_token
std::string gpt_params::hf_token = "" |
◆ hostname
std::string gpt_params::hostname = "127.0.0.1" |
◆ i_chunk
int32_t gpt_params::i_chunk = 0 |
◆ i_pos
int32_t gpt_params::i_pos = -1 |
◆ ignore_eos
bool gpt_params::ignore_eos = false |
◆ image
std::vector<std::string> gpt_params::image |
◆ in_files
std::vector<std::string> gpt_params::in_files |
◆ infill
bool gpt_params::infill = false |
◆ input_prefix
std::string gpt_params::input_prefix = "" |
◆ input_prefix_bos
bool gpt_params::input_prefix_bos = false |
◆ input_suffix
std::string gpt_params::input_suffix = "" |
◆ interactive
bool gpt_params::interactive = false |
◆ interactive_first
bool gpt_params::interactive_first = false |
◆ is_pp_shared
bool gpt_params::is_pp_shared = false |
◆ kl_divergence
bool gpt_params::kl_divergence = false |
◆ kv_overrides
std::vector<llama_model_kv_override> gpt_params::kv_overrides |
◆ log_json
bool gpt_params::log_json = false |
◆ logdir
std::string gpt_params::logdir = "" |
◆ logits_all
bool gpt_params::logits_all = false |
◆ logits_file
std::string gpt_params::logits_file = "" |
◆ lookup_cache_dynamic
std::string gpt_params::lookup_cache_dynamic = "" |
◆ lookup_cache_static
std::string gpt_params::lookup_cache_static = "" |
◆ lora_adapter
std::vector<std::tuple<std::string, float> > gpt_params::lora_adapter |
◆ lora_outfile
std::string gpt_params::lora_outfile = "ggml-lora-merged-f16.gguf" |
◆ main_gpu
int32_t gpt_params::main_gpu = 0 |
◆ mmproj
std::string gpt_params::mmproj = "" |
◆ model
std::string gpt_params::model = "" |
◆ model_alias
std::string gpt_params::model_alias = "unknown" |
◆ model_draft
std::string gpt_params::model_draft = "" |
◆ model_url
std::string gpt_params::model_url = "" |
◆ multiline_input
bool gpt_params::multiline_input = false |
◆ multiple_choice
bool gpt_params::multiple_choice = false |
◆ multiple_choice_tasks
size_t gpt_params::multiple_choice_tasks = 0 |
◆ n_batch
int32_t gpt_params::n_batch = 2048 |
◆ n_chunks
int32_t gpt_params::n_chunks = -1 |
◆ n_ctx
int32_t gpt_params::n_ctx = 0 |
◆ n_draft
int32_t gpt_params::n_draft = 5 |
◆ n_gpu_layers
int32_t gpt_params::n_gpu_layers = -1 |
◆ n_gpu_layers_draft
int32_t gpt_params::n_gpu_layers_draft = -1 |
◆ n_junk
int32_t gpt_params::n_junk = 250 |
◆ n_keep
int32_t gpt_params::n_keep = 0 |
◆ n_out_freq
int32_t gpt_params::n_out_freq = 10 |
◆ n_parallel
int32_t gpt_params::n_parallel = 1 |
◆ n_pca_batch
int gpt_params::n_pca_batch = 100 |
◆ n_pca_iterations
int gpt_params::n_pca_iterations = 1000 |
◆ n_pl
std::vector<int32_t> gpt_params::n_pl |
◆ n_pp
std::vector<int32_t> gpt_params::n_pp |
◆ n_predict
int32_t gpt_params::n_predict = -1 |
◆ n_print
int32_t gpt_params::n_print = -1 |
◆ n_save_freq
int32_t gpt_params::n_save_freq = 0 |
◆ n_sequences
int32_t gpt_params::n_sequences = 1 |
◆ n_tg
std::vector<int32_t> gpt_params::n_tg |
◆ n_threads
◆ n_threads_batch
int32_t gpt_params::n_threads_batch = -1 |
◆ n_threads_batch_draft
int32_t gpt_params::n_threads_batch_draft = -1 |
◆ n_threads_draft
int32_t gpt_params::n_threads_draft = -1 |
◆ n_threads_http
int32_t gpt_params::n_threads_http = -1 |
◆ n_ubatch
int32_t gpt_params::n_ubatch = 512 |
◆ no_kv_offload
bool gpt_params::no_kv_offload = false |
◆ numa
ggml_numa_strategy gpt_params::numa = GGML_NUMA_STRATEGY_DISABLED |
◆ out_file
std::string gpt_params::out_file = "imatrix.dat" |
◆ p_split
float gpt_params::p_split = 0.1f |
◆ path_prompt_cache
std::string gpt_params::path_prompt_cache = "" |
◆ pooling_type
enum llama_pooling_type gpt_params::pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED |
◆ port
int32_t gpt_params::port = 8080 |
◆ ppl_output_type
int32_t gpt_params::ppl_output_type = 0 |
◆ ppl_stride
int32_t gpt_params::ppl_stride = 0 |
◆ process_output
bool gpt_params::process_output = false |
◆ prompt
std::string gpt_params::prompt = "" |
◆ prompt_cache_all
bool gpt_params::prompt_cache_all = false |
◆ prompt_cache_ro
bool gpt_params::prompt_cache_ro = false |
◆ prompt_file
std::string gpt_params::prompt_file = "" |
◆ public_path
std::string gpt_params::public_path = "" |
◆ rope_freq_base
float gpt_params::rope_freq_base = 0.0f |
◆ rope_freq_scale
float gpt_params::rope_freq_scale = 0.0f |
◆ rope_scaling_type
enum llama_rope_scaling_type gpt_params::rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED |
◆ rpc_servers
std::string gpt_params::rpc_servers = "" |
◆ seed
uint32_t gpt_params::seed = LLAMA_DEFAULT_SEED |
◆ simple_io
bool gpt_params::simple_io = false |
◆ slot_prompt_similarity
float gpt_params::slot_prompt_similarity = 0.5f |
◆ slot_save_path
std::string gpt_params::slot_save_path |
◆ sparams
◆ special
bool gpt_params::special = false |
◆ split_mode
enum llama_split_mode gpt_params::split_mode = LLAMA_SPLIT_MODE_LAYER |
◆ spm_infill
bool gpt_params::spm_infill = false |
◆ ssl_file_cert
std::string gpt_params::ssl_file_cert = "" |
◆ ssl_file_key
std::string gpt_params::ssl_file_key = "" |
◆ system_prompt
std::string gpt_params::system_prompt = "" |
◆ tensor_split
float gpt_params::tensor_split[128] = { 0 } |
◆ timeout_read
int32_t gpt_params::timeout_read = 600 |
◆ timeout_write
◆ usage
bool gpt_params::usage = false |
◆ use_color
bool gpt_params::use_color = false |
◆ use_mlock
bool gpt_params::use_mlock = false |
◆ use_mmap
bool gpt_params::use_mmap = true |
◆ verbose_prompt
bool gpt_params::verbose_prompt = false |
◆ verbosity
int32_t gpt_params::verbosity = 0 |
◆ warmup
bool gpt_params::warmup = true |
◆ winogrande
bool gpt_params::winogrande = false |
◆ winogrande_tasks
size_t gpt_params::winogrande_tasks = 0 |
◆ yarn_attn_factor
float gpt_params::yarn_attn_factor = 1.0f |
◆ yarn_beta_fast
float gpt_params::yarn_beta_fast = 32.0f |
◆ yarn_beta_slow
float gpt_params::yarn_beta_slow = 1.0f |
◆ yarn_ext_factor
float gpt_params::yarn_ext_factor = -1.0f |
◆ yarn_orig_ctx
int32_t gpt_params::yarn_orig_ctx = 0 |
The documentation for this struct was generated from the following file: