Llama.cpp C++-to-Csharp wrapper from testedlines.com: C++ docs 1.0.1
Llama.cpp C++-to-Csharp wrapper is a minor extension to Llama.cpp tag b3490 codebase modified a bit by testedlines allowing it to be compiled for and called from Styled Lines Csharp unity asset store package.
Loading...
Searching...
No Matches
gpt_params Struct Reference

#include <common-base.h>

+ Collaboration diagram for gpt_params:

Data Fields

std::vector< std::string > antiprompt
 
std::vector< std::string > api_keys
 
enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED
 
std::string cache_type_k = "f16"
 
std::string cache_type_v = "f16"
 
ggml_backend_sched_eval_callback cb_eval = nullptr
 
void * cb_eval_user_data = nullptr
 
std::string chat_template = ""
 
bool check_tensors = false
 
std::string chunk_separator = "\n"
 
int32_t chunk_size = 64
 
bool compute_ppl = true
 
bool cont_batching = true
 
std::vector< std::string > context_files
 
int32_t control_vector_layer_end = -1
 
int32_t control_vector_layer_start = -1
 
std::vector< llama_control_vector_load_info > control_vectors
 
bool conversation = false
 
dimre_method cvector_dimre_method = DIMRE_METHOD_PCA
 
std::string cvector_negative_file = "examples/cvector-generator/negative.txt"
 
std::string cvector_outfile = "control_vector.gguf"
 
std::string cvector_positive_file = "examples/cvector-generator/positive.txt"
 
float defrag_thold = -1.0f
 
bool display_prompt = true
 
bool dump_kv_cache = false
 
int32_t embd_normalize = 2
 
std::string embd_out = ""
 
std::string embd_sep = "\n"
 
bool embedding = false
 
bool enable_chat_template = true
 
bool endpoint_metrics = false
 
bool endpoint_slots = true
 
bool escape = true
 
bool flash_attn = false
 
int32_t grp_attn_n = 1
 
int32_t grp_attn_w = 512
 
bool hellaswag = false
 
size_t hellaswag_tasks = 400
 
std::string hf_file = ""
 
std::string hf_repo = ""
 
std::string hf_token = ""
 
std::string hostname = "127.0.0.1"
 
int32_t i_chunk = 0
 
int32_t i_pos = -1
 
bool ignore_eos = false
 
std::vector< std::string > image
 
std::vector< std::string > in_files
 
bool infill = false
 
std::string input_prefix = ""
 
bool input_prefix_bos = false
 
std::string input_suffix = ""
 
bool interactive = false
 
bool interactive_first = false
 
bool is_pp_shared = false
 
bool kl_divergence = false
 
std::vector< llama_model_kv_override > kv_overrides
 
bool log_json = false
 
std::string logdir = ""
 
bool logits_all = false
 
std::string logits_file = ""
 
std::string lookup_cache_dynamic = ""
 
std::string lookup_cache_static = ""
 
std::vector< std::tuple< std::string, float > > lora_adapter
 
std::string lora_outfile = "ggml-lora-merged-f16.gguf"
 
int32_t main_gpu = 0
 
std::string mmproj = ""
 
std::string model = ""
 
std::string model_alias = "unknown"
 
std::string model_draft = ""
 
std::string model_url = ""
 
bool multiline_input = false
 
bool multiple_choice = false
 
size_t multiple_choice_tasks = 0
 
int32_t n_batch = 2048
 
int32_t n_chunks = -1
 
int32_t n_ctx = 0
 
int32_t n_draft = 5
 
int32_t n_gpu_layers = -1
 
int32_t n_gpu_layers_draft = -1
 
int32_t n_junk = 250
 
int32_t n_keep = 0
 
int32_t n_out_freq = 10
 
int32_t n_parallel = 1
 
int n_pca_batch = 100
 
int n_pca_iterations = 1000
 
std::vector< int32_t > n_pl
 
std::vector< int32_t > n_pp
 
int32_t n_predict = -1
 
int32_t n_print = -1
 
int32_t n_save_freq = 0
 
int32_t n_sequences = 1
 
std::vector< int32_t > n_tg
 
int32_t n_threads = cpu_get_num_math()
 
int32_t n_threads_batch = -1
 
int32_t n_threads_batch_draft = -1
 
int32_t n_threads_draft = -1
 
int32_t n_threads_http = -1
 
int32_t n_ubatch = 512
 
bool no_kv_offload = false
 
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED
 
std::string out_file = "imatrix.dat"
 
float p_split = 0.1f
 
std::string path_prompt_cache = ""
 
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED
 
int32_t port = 8080
 
int32_t ppl_output_type = 0
 
int32_t ppl_stride = 0
 
bool process_output = false
 
std::string prompt = ""
 
bool prompt_cache_all = false
 
bool prompt_cache_ro = false
 
std::string prompt_file = ""
 
std::string public_path = ""
 
float rope_freq_base = 0.0f
 
float rope_freq_scale = 0.0f
 
enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED
 
std::string rpc_servers = ""
 
uint32_t seed = LLAMA_DEFAULT_SEED
 
bool simple_io = false
 
float slot_prompt_similarity = 0.5f
 
std::string slot_save_path
 
struct llama_sampling_params sparams
 
bool special = false
 
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER
 
bool spm_infill = false
 
std::string ssl_file_cert = ""
 
std::string ssl_file_key = ""
 
std::string system_prompt = ""
 
float tensor_split [128] = { 0 }
 
int32_t timeout_read = 600
 
int32_t timeout_write = timeout_read
 
bool usage = false
 
bool use_color = false
 
bool use_mlock = false
 
bool use_mmap = true
 
bool verbose_prompt = false
 
int32_t verbosity = 0
 
bool warmup = true
 
bool winogrande = false
 
size_t winogrande_tasks = 0
 
float yarn_attn_factor = 1.0f
 
float yarn_beta_fast = 32.0f
 
float yarn_beta_slow = 1.0f
 
float yarn_ext_factor = -1.0f
 
int32_t yarn_orig_ctx = 0
 

Field Documentation

◆ antiprompt

std::vector<std::string> gpt_params::antiprompt

◆ api_keys

std::vector<std::string> gpt_params::api_keys

◆ attention_type

enum llama_attention_type gpt_params::attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED

◆ cache_type_k

std::string gpt_params::cache_type_k = "f16"

◆ cache_type_v

std::string gpt_params::cache_type_v = "f16"

◆ cb_eval

ggml_backend_sched_eval_callback gpt_params::cb_eval = nullptr

◆ cb_eval_user_data

void* gpt_params::cb_eval_user_data = nullptr

◆ chat_template

std::string gpt_params::chat_template = ""

◆ check_tensors

bool gpt_params::check_tensors = false

◆ chunk_separator

std::string gpt_params::chunk_separator = "\n"

◆ chunk_size

int32_t gpt_params::chunk_size = 64

◆ compute_ppl

bool gpt_params::compute_ppl = true

◆ cont_batching

bool gpt_params::cont_batching = true

◆ context_files

std::vector<std::string> gpt_params::context_files

◆ control_vector_layer_end

int32_t gpt_params::control_vector_layer_end = -1

◆ control_vector_layer_start

int32_t gpt_params::control_vector_layer_start = -1

◆ control_vectors

std::vector<llama_control_vector_load_info> gpt_params::control_vectors

◆ conversation

bool gpt_params::conversation = false

◆ cvector_dimre_method

dimre_method gpt_params::cvector_dimre_method = DIMRE_METHOD_PCA

◆ cvector_negative_file

std::string gpt_params::cvector_negative_file = "examples/cvector-generator/negative.txt"

◆ cvector_outfile

std::string gpt_params::cvector_outfile = "control_vector.gguf"

◆ cvector_positive_file

std::string gpt_params::cvector_positive_file = "examples/cvector-generator/positive.txt"

◆ defrag_thold

float gpt_params::defrag_thold = -1.0f

◆ display_prompt

bool gpt_params::display_prompt = true

◆ dump_kv_cache

bool gpt_params::dump_kv_cache = false

◆ embd_normalize

int32_t gpt_params::embd_normalize = 2

◆ embd_out

std::string gpt_params::embd_out = ""

◆ embd_sep

std::string gpt_params::embd_sep = "\n"

◆ embedding

bool gpt_params::embedding = false

◆ enable_chat_template

bool gpt_params::enable_chat_template = true

◆ endpoint_metrics

bool gpt_params::endpoint_metrics = false

◆ endpoint_slots

bool gpt_params::endpoint_slots = true

◆ escape

bool gpt_params::escape = true

◆ flash_attn

bool gpt_params::flash_attn = false

◆ grp_attn_n

int32_t gpt_params::grp_attn_n = 1

◆ grp_attn_w

int32_t gpt_params::grp_attn_w = 512

◆ hellaswag

bool gpt_params::hellaswag = false

◆ hellaswag_tasks

size_t gpt_params::hellaswag_tasks = 400

◆ hf_file

std::string gpt_params::hf_file = ""

◆ hf_repo

std::string gpt_params::hf_repo = ""

◆ hf_token

std::string gpt_params::hf_token = ""

◆ hostname

std::string gpt_params::hostname = "127.0.0.1"

◆ i_chunk

int32_t gpt_params::i_chunk = 0

◆ i_pos

int32_t gpt_params::i_pos = -1

◆ ignore_eos

bool gpt_params::ignore_eos = false

◆ image

std::vector<std::string> gpt_params::image

◆ in_files

std::vector<std::string> gpt_params::in_files

◆ infill

bool gpt_params::infill = false

◆ input_prefix

std::string gpt_params::input_prefix = ""

◆ input_prefix_bos

bool gpt_params::input_prefix_bos = false

◆ input_suffix

std::string gpt_params::input_suffix = ""

◆ interactive

bool gpt_params::interactive = false

◆ interactive_first

bool gpt_params::interactive_first = false

◆ is_pp_shared

bool gpt_params::is_pp_shared = false

◆ kl_divergence

bool gpt_params::kl_divergence = false

◆ kv_overrides

std::vector<llama_model_kv_override> gpt_params::kv_overrides

◆ log_json

bool gpt_params::log_json = false

◆ logdir

std::string gpt_params::logdir = ""

◆ logits_all

bool gpt_params::logits_all = false

◆ logits_file

std::string gpt_params::logits_file = ""

◆ lookup_cache_dynamic

std::string gpt_params::lookup_cache_dynamic = ""

◆ lookup_cache_static

std::string gpt_params::lookup_cache_static = ""

◆ lora_adapter

std::vector<std::tuple<std::string, float> > gpt_params::lora_adapter

◆ lora_outfile

std::string gpt_params::lora_outfile = "ggml-lora-merged-f16.gguf"

◆ main_gpu

int32_t gpt_params::main_gpu = 0

◆ mmproj

std::string gpt_params::mmproj = ""

◆ model

std::string gpt_params::model = ""

◆ model_alias

std::string gpt_params::model_alias = "unknown"

◆ model_draft

std::string gpt_params::model_draft = ""

◆ model_url

std::string gpt_params::model_url = ""

◆ multiline_input

bool gpt_params::multiline_input = false

◆ multiple_choice

bool gpt_params::multiple_choice = false

◆ multiple_choice_tasks

size_t gpt_params::multiple_choice_tasks = 0

◆ n_batch

int32_t gpt_params::n_batch = 2048

◆ n_chunks

int32_t gpt_params::n_chunks = -1

◆ n_ctx

int32_t gpt_params::n_ctx = 0

◆ n_draft

int32_t gpt_params::n_draft = 5

◆ n_gpu_layers

int32_t gpt_params::n_gpu_layers = -1

◆ n_gpu_layers_draft

int32_t gpt_params::n_gpu_layers_draft = -1

◆ n_junk

int32_t gpt_params::n_junk = 250

◆ n_keep

int32_t gpt_params::n_keep = 0

◆ n_out_freq

int32_t gpt_params::n_out_freq = 10

◆ n_parallel

int32_t gpt_params::n_parallel = 1

◆ n_pca_batch

int gpt_params::n_pca_batch = 100

◆ n_pca_iterations

int gpt_params::n_pca_iterations = 1000

◆ n_pl

std::vector<int32_t> gpt_params::n_pl

◆ n_pp

std::vector<int32_t> gpt_params::n_pp

◆ n_predict

int32_t gpt_params::n_predict = -1

◆ n_print

int32_t gpt_params::n_print = -1

◆ n_save_freq

int32_t gpt_params::n_save_freq = 0

◆ n_sequences

int32_t gpt_params::n_sequences = 1

◆ n_tg

std::vector<int32_t> gpt_params::n_tg

◆ n_threads

int32_t gpt_params::n_threads = cpu_get_num_math()

◆ n_threads_batch

int32_t gpt_params::n_threads_batch = -1

◆ n_threads_batch_draft

int32_t gpt_params::n_threads_batch_draft = -1

◆ n_threads_draft

int32_t gpt_params::n_threads_draft = -1

◆ n_threads_http

int32_t gpt_params::n_threads_http = -1

◆ n_ubatch

int32_t gpt_params::n_ubatch = 512

◆ no_kv_offload

bool gpt_params::no_kv_offload = false

◆ numa

ggml_numa_strategy gpt_params::numa = GGML_NUMA_STRATEGY_DISABLED

◆ out_file

std::string gpt_params::out_file = "imatrix.dat"

◆ p_split

float gpt_params::p_split = 0.1f

◆ path_prompt_cache

std::string gpt_params::path_prompt_cache = ""

◆ pooling_type

enum llama_pooling_type gpt_params::pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED

◆ port

int32_t gpt_params::port = 8080

◆ ppl_output_type

int32_t gpt_params::ppl_output_type = 0

◆ ppl_stride

int32_t gpt_params::ppl_stride = 0

◆ process_output

bool gpt_params::process_output = false

◆ prompt

std::string gpt_params::prompt = ""

◆ prompt_cache_all

bool gpt_params::prompt_cache_all = false

◆ prompt_cache_ro

bool gpt_params::prompt_cache_ro = false

◆ prompt_file

std::string gpt_params::prompt_file = ""

◆ public_path

std::string gpt_params::public_path = ""

◆ rope_freq_base

float gpt_params::rope_freq_base = 0.0f

◆ rope_freq_scale

float gpt_params::rope_freq_scale = 0.0f

◆ rope_scaling_type

enum llama_rope_scaling_type gpt_params::rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED

◆ rpc_servers

std::string gpt_params::rpc_servers = ""

◆ seed

uint32_t gpt_params::seed = LLAMA_DEFAULT_SEED

◆ simple_io

bool gpt_params::simple_io = false

◆ slot_prompt_similarity

float gpt_params::slot_prompt_similarity = 0.5f

◆ slot_save_path

std::string gpt_params::slot_save_path

◆ sparams

struct llama_sampling_params gpt_params::sparams

◆ special

bool gpt_params::special = false

◆ split_mode

enum llama_split_mode gpt_params::split_mode = LLAMA_SPLIT_MODE_LAYER

◆ spm_infill

bool gpt_params::spm_infill = false

◆ ssl_file_cert

std::string gpt_params::ssl_file_cert = ""

◆ ssl_file_key

std::string gpt_params::ssl_file_key = ""

◆ system_prompt

std::string gpt_params::system_prompt = ""

◆ tensor_split

float gpt_params::tensor_split[128] = { 0 }

◆ timeout_read

int32_t gpt_params::timeout_read = 600

◆ timeout_write

int32_t gpt_params::timeout_write = timeout_read

◆ usage

bool gpt_params::usage = false

◆ use_color

bool gpt_params::use_color = false

◆ use_mlock

bool gpt_params::use_mlock = false

◆ use_mmap

bool gpt_params::use_mmap = true

◆ verbose_prompt

bool gpt_params::verbose_prompt = false

◆ verbosity

int32_t gpt_params::verbosity = 0

◆ warmup

bool gpt_params::warmup = true

◆ winogrande

bool gpt_params::winogrande = false

◆ winogrande_tasks

size_t gpt_params::winogrande_tasks = 0

◆ yarn_attn_factor

float gpt_params::yarn_attn_factor = 1.0f

◆ yarn_beta_fast

float gpt_params::yarn_beta_fast = 32.0f

◆ yarn_beta_slow

float gpt_params::yarn_beta_slow = 1.0f

◆ yarn_ext_factor

float gpt_params::yarn_ext_factor = -1.0f

◆ yarn_orig_ctx

int32_t gpt_params::yarn_orig_ctx = 0

The documentation for this struct was generated from the following file: