#include <common-base.h>

Collaboration diagram for gpt_params:

Data Fields
std::vector< std::string >	antiprompt

std::vector< std::string >	api_keys

enum llama_attention_type	attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED

std::string	cache_type_k = "f16"

std::string	cache_type_v = "f16"

ggml_backend_sched_eval_callback	cb_eval = nullptr

void *	cb_eval_user_data = nullptr

std::string	chat_template = ""

bool	check_tensors = false

std::string	chunk_separator = "\n"

int32_t	chunk_size = 64

bool	compute_ppl = true

bool	cont_batching = true

std::vector< std::string >	context_files

int32_t	control_vector_layer_end = -1

int32_t	control_vector_layer_start = -1

std::vector< llama_control_vector_load_info >	control_vectors

bool	conversation = false

dimre_method	cvector_dimre_method = DIMRE_METHOD_PCA

std::string	cvector_negative_file = "examples/cvector-generator/negative.txt"

std::string	cvector_outfile = "control_vector.gguf"

std::string	cvector_positive_file = "examples/cvector-generator/positive.txt"

float	defrag_thold = -1.0f

bool	display_prompt = true

bool	dump_kv_cache = false

int32_t	embd_normalize = 2

std::string	embd_out = ""

std::string	embd_sep = "\n"

bool	embedding = false

bool	enable_chat_template = true

bool	endpoint_metrics = false

bool	endpoint_slots = true

bool	escape = true

bool	flash_attn = false

int32_t	grp_attn_n = 1

int32_t	grp_attn_w = 512

bool	hellaswag = false

size_t	hellaswag_tasks = 400

std::string	hf_file = ""

std::string	hf_repo = ""

std::string	hf_token = ""

std::string	hostname = "127.0.0.1"

int32_t	i_chunk = 0

int32_t	i_pos = -1

bool	ignore_eos = false

std::vector< std::string >	image

std::vector< std::string >	in_files

bool	infill = false

std::string	input_prefix = ""

bool	input_prefix_bos = false

std::string	input_suffix = ""

bool	interactive = false

bool	interactive_first = false

bool	is_pp_shared = false

bool	kl_divergence = false

std::vector< llama_model_kv_override >	kv_overrides

bool	log_json = false

std::string	logdir = ""

bool	logits_all = false

std::string	logits_file = ""

std::string	lookup_cache_dynamic = ""

std::string	lookup_cache_static = ""

std::vector< std::tuple< std::string, float > >	lora_adapter

std::string	lora_outfile = "ggml-lora-merged-f16.gguf"

int32_t	main_gpu = 0

std::string	mmproj = ""

std::string	model = ""

std::string	model_alias = "unknown"

std::string	model_draft = ""

std::string	model_url = ""

bool	multiline_input = false

bool	multiple_choice = false

size_t	multiple_choice_tasks = 0

int32_t	n_batch = 2048

int32_t	n_chunks = -1

int32_t	n_ctx = 0

int32_t	n_draft = 5

int32_t	n_gpu_layers = -1

int32_t	n_gpu_layers_draft = -1

int32_t	n_junk = 250

int32_t	n_keep = 0

int32_t	n_out_freq = 10

int32_t	n_parallel = 1

int	n_pca_batch = 100

int	n_pca_iterations = 1000

std::vector< int32_t >	n_pl

std::vector< int32_t >	n_pp

int32_t	n_predict = -1

int32_t	n_print = -1

int32_t	n_save_freq = 0

int32_t	n_sequences = 1

std::vector< int32_t >	n_tg

int32_t	n_threads = cpu_get_num_math()

int32_t	n_threads_batch = -1

int32_t	n_threads_batch_draft = -1

int32_t	n_threads_draft = -1

int32_t	n_threads_http = -1

int32_t	n_ubatch = 512

bool	no_kv_offload = false

ggml_numa_strategy	numa = GGML_NUMA_STRATEGY_DISABLED

std::string	out_file = "imatrix.dat"

float	p_split = 0.1f

std::string	path_prompt_cache = ""

enum llama_pooling_type	pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED

int32_t	port = 8080

int32_t	ppl_output_type = 0

int32_t	ppl_stride = 0

bool	process_output = false

std::string	prompt = ""

bool	prompt_cache_all = false

bool	prompt_cache_ro = false

std::string	prompt_file = ""

std::string	public_path = ""

float	rope_freq_base = 0.0f

float	rope_freq_scale = 0.0f

enum llama_rope_scaling_type	rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED

std::string	rpc_servers = ""

uint32_t	seed = LLAMA_DEFAULT_SEED

bool	simple_io = false

float	slot_prompt_similarity = 0.5f

std::string	slot_save_path

struct llama_sampling_params	sparams

bool	special = false

enum llama_split_mode	split_mode = LLAMA_SPLIT_MODE_LAYER

bool	spm_infill = false

std::string	ssl_file_cert = ""

std::string	ssl_file_key = ""

std::string	system_prompt = ""

float	tensor_split [128] = { 0 }

int32_t	timeout_read = 600

int32_t	timeout_write = timeout_read

bool	usage = false

bool	use_color = false

bool	use_mlock = false

bool	use_mmap = true

bool	verbose_prompt = false

int32_t	verbosity = 0

bool	warmup = true

bool	winogrande = false

size_t	winogrande_tasks = 0

float	yarn_attn_factor = 1.0f

float	yarn_beta_fast = 32.0f

float	yarn_beta_slow = 1.0f

float	yarn_ext_factor = -1.0f

int32_t	yarn_orig_ctx = 0

Field Documentation

◆ antiprompt

std::vector<std::string> gpt_params::antiprompt

◆ api_keys

std::vector<std::string> gpt_params::api_keys

◆ attention_type

enum llama_attention_type gpt_params::attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED

◆ cache_type_k

std::string gpt_params::cache_type_k = "f16"

◆ cache_type_v

std::string gpt_params::cache_type_v = "f16"

◆ cb_eval

ggml_backend_sched_eval_callback gpt_params::cb_eval = nullptr

◆ cb_eval_user_data

void* gpt_params::cb_eval_user_data = nullptr

◆ chat_template

std::string gpt_params::chat_template = ""

◆ check_tensors

bool gpt_params::check_tensors = false

◆ chunk_separator

std::string gpt_params::chunk_separator = "\n"

◆ chunk_size

int32_t gpt_params::chunk_size = 64

◆ compute_ppl

bool gpt_params::compute_ppl = true

◆ cont_batching

bool gpt_params::cont_batching = true

◆ context_files

std::vector<std::string> gpt_params::context_files

◆ control_vector_layer_end

int32_t gpt_params::control_vector_layer_end = -1

◆ control_vector_layer_start

int32_t gpt_params::control_vector_layer_start = -1

◆ control_vectors

std::vector<llama_control_vector_load_info> gpt_params::control_vectors

◆ conversation

bool gpt_params::conversation = false

◆ cvector_dimre_method

dimre_method gpt_params::cvector_dimre_method = DIMRE_METHOD_PCA

◆ cvector_negative_file

std::string gpt_params::cvector_negative_file = "examples/cvector-generator/negative.txt"

◆ cvector_outfile

std::string gpt_params::cvector_outfile = "control_vector.gguf"

◆ cvector_positive_file

std::string gpt_params::cvector_positive_file = "examples/cvector-generator/positive.txt"

◆ defrag_thold

float gpt_params::defrag_thold = -1.0f

◆ display_prompt

bool gpt_params::display_prompt = true

◆ dump_kv_cache

bool gpt_params::dump_kv_cache = false

◆ embd_normalize

int32_t gpt_params::embd_normalize = 2

◆ embd_out

std::string gpt_params::embd_out = ""

◆ embd_sep

std::string gpt_params::embd_sep = "\n"

◆ embedding

bool gpt_params::embedding = false

◆ enable_chat_template

bool gpt_params::enable_chat_template = true

◆ endpoint_metrics

bool gpt_params::endpoint_metrics = false

◆ endpoint_slots

bool gpt_params::endpoint_slots = true

◆ escape

bool gpt_params::escape = true

◆ flash_attn

bool gpt_params::flash_attn = false

◆ grp_attn_n

int32_t gpt_params::grp_attn_n = 1

◆ grp_attn_w

int32_t gpt_params::grp_attn_w = 512

◆ hellaswag

bool gpt_params::hellaswag = false

◆ hellaswag_tasks

size_t gpt_params::hellaswag_tasks = 400

◆ hf_file

std::string gpt_params::hf_file = ""

◆ hf_repo

std::string gpt_params::hf_repo = ""

◆ hf_token

std::string gpt_params::hf_token = ""

◆ hostname

std::string gpt_params::hostname = "127.0.0.1"

◆ i_chunk

int32_t gpt_params::i_chunk = 0

◆ i_pos

int32_t gpt_params::i_pos = -1

◆ ignore_eos

bool gpt_params::ignore_eos = false

◆ image

std::vector<std::string> gpt_params::image

◆ in_files

std::vector<std::string> gpt_params::in_files

◆ infill

bool gpt_params::infill = false

◆ input_prefix

std::string gpt_params::input_prefix = ""

◆ input_prefix_bos

bool gpt_params::input_prefix_bos = false

◆ input_suffix

std::string gpt_params::input_suffix = ""

◆ interactive

bool gpt_params::interactive = false

◆ interactive_first

bool gpt_params::interactive_first = false

◆ is_pp_shared

bool gpt_params::is_pp_shared = false

◆ kl_divergence

bool gpt_params::kl_divergence = false

◆ kv_overrides

std::vector<llama_model_kv_override> gpt_params::kv_overrides

◆ log_json

bool gpt_params::log_json = false

◆ logdir

std::string gpt_params::logdir = ""

◆ logits_all

bool gpt_params::logits_all = false

◆ logits_file

std::string gpt_params::logits_file = ""

◆ lookup_cache_dynamic

std::string gpt_params::lookup_cache_dynamic = ""

◆ lookup_cache_static

std::string gpt_params::lookup_cache_static = ""

◆ lora_adapter

std::vector<std::tuple<std::string, float> > gpt_params::lora_adapter

◆ lora_outfile

std::string gpt_params::lora_outfile = "ggml-lora-merged-f16.gguf"

◆ main_gpu

int32_t gpt_params::main_gpu = 0

◆ mmproj

std::string gpt_params::mmproj = ""

◆ model

std::string gpt_params::model = ""

◆ model_alias

std::string gpt_params::model_alias = "unknown"

◆ model_draft

std::string gpt_params::model_draft = ""

◆ model_url

std::string gpt_params::model_url = ""

◆ multiline_input

bool gpt_params::multiline_input = false

◆ multiple_choice

bool gpt_params::multiple_choice = false

◆ multiple_choice_tasks

size_t gpt_params::multiple_choice_tasks = 0

◆ n_batch

int32_t gpt_params::n_batch = 2048

◆ n_chunks

int32_t gpt_params::n_chunks = -1

◆ n_ctx

int32_t gpt_params::n_ctx = 0

◆ n_draft

int32_t gpt_params::n_draft = 5

◆ n_gpu_layers

int32_t gpt_params::n_gpu_layers = -1

◆ n_gpu_layers_draft

int32_t gpt_params::n_gpu_layers_draft = -1

◆ n_junk

int32_t gpt_params::n_junk = 250

◆ n_keep

int32_t gpt_params::n_keep = 0

◆ n_out_freq

int32_t gpt_params::n_out_freq = 10

◆ n_parallel

int32_t gpt_params::n_parallel = 1

◆ n_pca_batch

int gpt_params::n_pca_batch = 100

◆ n_pca_iterations

int gpt_params::n_pca_iterations = 1000

◆ n_pl

std::vector<int32_t> gpt_params::n_pl

◆ n_pp

std::vector<int32_t> gpt_params::n_pp

◆ n_predict

int32_t gpt_params::n_predict = -1

◆ n_print

int32_t gpt_params::n_print = -1

◆ n_save_freq

int32_t gpt_params::n_save_freq = 0

◆ n_sequences

int32_t gpt_params::n_sequences = 1

◆ n_tg

std::vector<int32_t> gpt_params::n_tg

◆ n_threads

int32_t gpt_params::n_threads = cpu_get_num_math()

◆ n_threads_batch

int32_t gpt_params::n_threads_batch = -1

◆ n_threads_batch_draft

int32_t gpt_params::n_threads_batch_draft = -1

◆ n_threads_draft

int32_t gpt_params::n_threads_draft = -1

◆ n_threads_http

int32_t gpt_params::n_threads_http = -1

◆ n_ubatch

int32_t gpt_params::n_ubatch = 512

◆ no_kv_offload

bool gpt_params::no_kv_offload = false

◆ numa

ggml_numa_strategy gpt_params::numa = GGML_NUMA_STRATEGY_DISABLED

◆ out_file

std::string gpt_params::out_file = "imatrix.dat"

◆ p_split

float gpt_params::p_split = 0.1f

◆ path_prompt_cache

std::string gpt_params::path_prompt_cache = ""

◆ pooling_type

enum llama_pooling_type gpt_params::pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED

◆ port

int32_t gpt_params::port = 8080

◆ ppl_output_type

int32_t gpt_params::ppl_output_type = 0

◆ ppl_stride

int32_t gpt_params::ppl_stride = 0

◆ process_output

bool gpt_params::process_output = false

◆ prompt

std::string gpt_params::prompt = ""

◆ prompt_cache_all

bool gpt_params::prompt_cache_all = false

◆ prompt_cache_ro

bool gpt_params::prompt_cache_ro = false

◆ prompt_file

std::string gpt_params::prompt_file = ""

◆ public_path

std::string gpt_params::public_path = ""

◆ rope_freq_base

float gpt_params::rope_freq_base = 0.0f

◆ rope_freq_scale

float gpt_params::rope_freq_scale = 0.0f

◆ rope_scaling_type

enum llama_rope_scaling_type gpt_params::rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED

◆ rpc_servers

std::string gpt_params::rpc_servers = ""

◆ seed

uint32_t gpt_params::seed = LLAMA_DEFAULT_SEED

◆ simple_io

bool gpt_params::simple_io = false

◆ slot_prompt_similarity

float gpt_params::slot_prompt_similarity = 0.5f

◆ slot_save_path

std::string gpt_params::slot_save_path

◆ sparams

struct llama_sampling_params gpt_params::sparams

◆ special

bool gpt_params::special = false

◆ split_mode

enum llama_split_mode gpt_params::split_mode = LLAMA_SPLIT_MODE_LAYER

◆ spm_infill

bool gpt_params::spm_infill = false

◆ ssl_file_cert

std::string gpt_params::ssl_file_cert = ""

◆ ssl_file_key

std::string gpt_params::ssl_file_key = ""

◆ system_prompt

std::string gpt_params::system_prompt = ""

◆ tensor_split

float gpt_params::tensor_split[128] = { 0 }

◆ timeout_read

int32_t gpt_params::timeout_read = 600

◆ timeout_write

int32_t gpt_params::timeout_write = timeout_read

◆ usage

bool gpt_params::usage = false

◆ use_color

bool gpt_params::use_color = false

◆ use_mlock

bool gpt_params::use_mlock = false

◆ use_mmap

bool gpt_params::use_mmap = true

◆ verbose_prompt

bool gpt_params::verbose_prompt = false

◆ verbosity

int32_t gpt_params::verbosity = 0

◆ warmup

bool gpt_params::warmup = true

◆ winogrande

bool gpt_params::winogrande = false

◆ winogrande_tasks

size_t gpt_params::winogrande_tasks = 0

◆ yarn_attn_factor

float gpt_params::yarn_attn_factor = 1.0f

◆ yarn_beta_fast

float gpt_params::yarn_beta_fast = 32.0f

◆ yarn_beta_slow

float gpt_params::yarn_beta_slow = 1.0f

◆ yarn_ext_factor

float gpt_params::yarn_ext_factor = -1.0f

◆ yarn_orig_ctx

int32_t gpt_params::yarn_orig_ctx = 0

The documentation for this struct was generated from the following file:

common/common-base.h

Data Fields