#include <common-base.h>
◆ antiprompt
| std::vector<std::string> gpt_params::antiprompt |
◆ api_keys
| std::vector<std::string> gpt_params::api_keys |
◆ attention_type
| enum llama_attention_type gpt_params::attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED |
◆ cache_type_k
| std::string gpt_params::cache_type_k = "f16" |
◆ cache_type_v
| std::string gpt_params::cache_type_v = "f16" |
◆ cb_eval
| ggml_backend_sched_eval_callback gpt_params::cb_eval = nullptr |
◆ cb_eval_user_data
| void* gpt_params::cb_eval_user_data = nullptr |
◆ chat_template
| std::string gpt_params::chat_template = "" |
◆ check_tensors
| bool gpt_params::check_tensors = false |
◆ chunk_separator
| std::string gpt_params::chunk_separator = "\n" |
◆ chunk_size
| int32_t gpt_params::chunk_size = 64 |
◆ compute_ppl
| bool gpt_params::compute_ppl = true |
◆ cont_batching
| bool gpt_params::cont_batching = true |
◆ context_files
| std::vector<std::string> gpt_params::context_files |
◆ control_vector_layer_end
| int32_t gpt_params::control_vector_layer_end = -1 |
◆ control_vector_layer_start
| int32_t gpt_params::control_vector_layer_start = -1 |
◆ control_vectors
| std::vector<llama_control_vector_load_info> gpt_params::control_vectors |
◆ conversation
| bool gpt_params::conversation = false |
◆ cvector_dimre_method
◆ cvector_negative_file
| std::string gpt_params::cvector_negative_file = "examples/cvector-generator/negative.txt" |
◆ cvector_outfile
| std::string gpt_params::cvector_outfile = "control_vector.gguf" |
◆ cvector_positive_file
| std::string gpt_params::cvector_positive_file = "examples/cvector-generator/positive.txt" |
◆ defrag_thold
| float gpt_params::defrag_thold = -1.0f |
◆ display_prompt
| bool gpt_params::display_prompt = true |
◆ dump_kv_cache
| bool gpt_params::dump_kv_cache = false |
◆ embd_normalize
| int32_t gpt_params::embd_normalize = 2 |
◆ embd_out
| std::string gpt_params::embd_out = "" |
◆ embd_sep
| std::string gpt_params::embd_sep = "\n" |
◆ embedding
| bool gpt_params::embedding = false |
◆ enable_chat_template
| bool gpt_params::enable_chat_template = true |
◆ endpoint_metrics
| bool gpt_params::endpoint_metrics = false |
◆ endpoint_slots
| bool gpt_params::endpoint_slots = true |
◆ escape
| bool gpt_params::escape = true |
◆ flash_attn
| bool gpt_params::flash_attn = false |
◆ grp_attn_n
| int32_t gpt_params::grp_attn_n = 1 |
◆ grp_attn_w
| int32_t gpt_params::grp_attn_w = 512 |
◆ hellaswag
| bool gpt_params::hellaswag = false |
◆ hellaswag_tasks
| size_t gpt_params::hellaswag_tasks = 400 |
◆ hf_file
| std::string gpt_params::hf_file = "" |
◆ hf_repo
| std::string gpt_params::hf_repo = "" |
◆ hf_token
| std::string gpt_params::hf_token = "" |
◆ hostname
| std::string gpt_params::hostname = "127.0.0.1" |
◆ i_chunk
| int32_t gpt_params::i_chunk = 0 |
◆ i_pos
| int32_t gpt_params::i_pos = -1 |
◆ ignore_eos
| bool gpt_params::ignore_eos = false |
◆ image
| std::vector<std::string> gpt_params::image |
◆ in_files
| std::vector<std::string> gpt_params::in_files |
◆ infill
| bool gpt_params::infill = false |
◆ input_prefix
| std::string gpt_params::input_prefix = "" |
◆ input_prefix_bos
| bool gpt_params::input_prefix_bos = false |
◆ input_suffix
| std::string gpt_params::input_suffix = "" |
◆ interactive
| bool gpt_params::interactive = false |
◆ interactive_first
| bool gpt_params::interactive_first = false |
◆ is_pp_shared
| bool gpt_params::is_pp_shared = false |
◆ kl_divergence
| bool gpt_params::kl_divergence = false |
◆ kv_overrides
| std::vector<llama_model_kv_override> gpt_params::kv_overrides |
◆ log_json
| bool gpt_params::log_json = false |
◆ logdir
| std::string gpt_params::logdir = "" |
◆ logits_all
| bool gpt_params::logits_all = false |
◆ logits_file
| std::string gpt_params::logits_file = "" |
◆ lookup_cache_dynamic
| std::string gpt_params::lookup_cache_dynamic = "" |
◆ lookup_cache_static
| std::string gpt_params::lookup_cache_static = "" |
◆ lora_adapter
| std::vector<std::tuple<std::string, float> > gpt_params::lora_adapter |
◆ lora_outfile
| std::string gpt_params::lora_outfile = "ggml-lora-merged-f16.gguf" |
◆ main_gpu
| int32_t gpt_params::main_gpu = 0 |
◆ mmproj
| std::string gpt_params::mmproj = "" |
◆ model
| std::string gpt_params::model = "" |
◆ model_alias
| std::string gpt_params::model_alias = "unknown" |
◆ model_draft
| std::string gpt_params::model_draft = "" |
◆ model_url
| std::string gpt_params::model_url = "" |
◆ multiline_input
| bool gpt_params::multiline_input = false |
◆ multiple_choice
| bool gpt_params::multiple_choice = false |
◆ multiple_choice_tasks
| size_t gpt_params::multiple_choice_tasks = 0 |
◆ n_batch
| int32_t gpt_params::n_batch = 2048 |
◆ n_chunks
| int32_t gpt_params::n_chunks = -1 |
◆ n_ctx
| int32_t gpt_params::n_ctx = 0 |
◆ n_draft
| int32_t gpt_params::n_draft = 5 |
◆ n_gpu_layers
| int32_t gpt_params::n_gpu_layers = -1 |
◆ n_gpu_layers_draft
| int32_t gpt_params::n_gpu_layers_draft = -1 |
◆ n_junk
| int32_t gpt_params::n_junk = 250 |
◆ n_keep
| int32_t gpt_params::n_keep = 0 |
◆ n_out_freq
| int32_t gpt_params::n_out_freq = 10 |
◆ n_parallel
| int32_t gpt_params::n_parallel = 1 |
◆ n_pca_batch
| int gpt_params::n_pca_batch = 100 |
◆ n_pca_iterations
| int gpt_params::n_pca_iterations = 1000 |
◆ n_pl
| std::vector<int32_t> gpt_params::n_pl |
◆ n_pp
| std::vector<int32_t> gpt_params::n_pp |
◆ n_predict
| int32_t gpt_params::n_predict = -1 |
◆ n_print
| int32_t gpt_params::n_print = -1 |
◆ n_save_freq
| int32_t gpt_params::n_save_freq = 0 |
◆ n_sequences
| int32_t gpt_params::n_sequences = 1 |
◆ n_tg
| std::vector<int32_t> gpt_params::n_tg |
◆ n_threads
◆ n_threads_batch
| int32_t gpt_params::n_threads_batch = -1 |
◆ n_threads_batch_draft
| int32_t gpt_params::n_threads_batch_draft = -1 |
◆ n_threads_draft
| int32_t gpt_params::n_threads_draft = -1 |
◆ n_threads_http
| int32_t gpt_params::n_threads_http = -1 |
◆ n_ubatch
| int32_t gpt_params::n_ubatch = 512 |
◆ no_kv_offload
| bool gpt_params::no_kv_offload = false |
◆ numa
| ggml_numa_strategy gpt_params::numa = GGML_NUMA_STRATEGY_DISABLED |
◆ out_file
| std::string gpt_params::out_file = "imatrix.dat" |
◆ p_split
| float gpt_params::p_split = 0.1f |
◆ path_prompt_cache
| std::string gpt_params::path_prompt_cache = "" |
◆ pooling_type
| enum llama_pooling_type gpt_params::pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED |
◆ port
| int32_t gpt_params::port = 8080 |
◆ ppl_output_type
| int32_t gpt_params::ppl_output_type = 0 |
◆ ppl_stride
| int32_t gpt_params::ppl_stride = 0 |
◆ process_output
| bool gpt_params::process_output = false |
◆ prompt
| std::string gpt_params::prompt = "" |
◆ prompt_cache_all
| bool gpt_params::prompt_cache_all = false |
◆ prompt_cache_ro
| bool gpt_params::prompt_cache_ro = false |
◆ prompt_file
| std::string gpt_params::prompt_file = "" |
◆ public_path
| std::string gpt_params::public_path = "" |
◆ rope_freq_base
| float gpt_params::rope_freq_base = 0.0f |
◆ rope_freq_scale
| float gpt_params::rope_freq_scale = 0.0f |
◆ rope_scaling_type
| enum llama_rope_scaling_type gpt_params::rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED |
◆ rpc_servers
| std::string gpt_params::rpc_servers = "" |
◆ seed
| uint32_t gpt_params::seed = LLAMA_DEFAULT_SEED |
◆ simple_io
| bool gpt_params::simple_io = false |
◆ slot_prompt_similarity
| float gpt_params::slot_prompt_similarity = 0.5f |
◆ slot_save_path
| std::string gpt_params::slot_save_path |
◆ sparams
◆ special
| bool gpt_params::special = false |
◆ split_mode
| enum llama_split_mode gpt_params::split_mode = LLAMA_SPLIT_MODE_LAYER |
◆ spm_infill
| bool gpt_params::spm_infill = false |
◆ ssl_file_cert
| std::string gpt_params::ssl_file_cert = "" |
◆ ssl_file_key
| std::string gpt_params::ssl_file_key = "" |
◆ system_prompt
| std::string gpt_params::system_prompt = "" |
◆ tensor_split
| float gpt_params::tensor_split[128] = { 0 } |
◆ timeout_read
| int32_t gpt_params::timeout_read = 600 |
◆ timeout_write
◆ usage
| bool gpt_params::usage = false |
◆ use_color
| bool gpt_params::use_color = false |
◆ use_mlock
| bool gpt_params::use_mlock = false |
◆ use_mmap
| bool gpt_params::use_mmap = true |
◆ verbose_prompt
| bool gpt_params::verbose_prompt = false |
◆ verbosity
| int32_t gpt_params::verbosity = 0 |
◆ warmup
| bool gpt_params::warmup = true |
◆ winogrande
| bool gpt_params::winogrande = false |
◆ winogrande_tasks
| size_t gpt_params::winogrande_tasks = 0 |
◆ yarn_attn_factor
| float gpt_params::yarn_attn_factor = 1.0f |
◆ yarn_beta_fast
| float gpt_params::yarn_beta_fast = 32.0f |
◆ yarn_beta_slow
| float gpt_params::yarn_beta_slow = 1.0f |
◆ yarn_ext_factor
| float gpt_params::yarn_ext_factor = -1.0f |
◆ yarn_orig_ctx
| int32_t gpt_params::yarn_orig_ctx = 0 |
The documentation for this struct was generated from the following file: