Llama.cpp C++-to-Csharp wrapper from testedlines.com: C++ docs 1.0.1
Llama.cpp C++-to-Csharp wrapper is a minor extension to Llama.cpp tag b3490 codebase modified a bit by testedlines allowing it to be compiled for and called from Styled Lines Csharp unity asset store package.
Loading...
Searching...
No Matches
sampling-base.h
Go to the documentation of this file.
1#pragma once
2#include <random>
3#include <string>
4#include <unordered_map>
5#include <vector>
6
7// sampler types
8enum class llama_sampler_type : char {
9 TOP_K = 'k',
10 TOP_P = 'p',
11 MIN_P = 'm',
12 TFS_Z = 'f',
13 TYPICAL_P = 'y',
14 TEMPERATURE = 't'
15};
16
17// sampling parameters
18typedef struct llama_sampling_params {
19 int32_t n_prev = 64; // number of previous tokens to remember
20 int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
21 int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
22 int32_t top_k = 40; // <= 0 to use vocab size
23 float top_p = 0.95f; // 1.0 = disabled
24 float min_p = 0.05f; // 0.0 = disabled
25 float tfs_z = 1.00f; // 1.0 = disabled
26 float typical_p = 1.00f; // 1.0 = disabled
27 float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
28 float dynatemp_range = 0.00f; // 0.0 = disabled
29 float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
30 int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
31 float penalty_repeat = 1.00f; // 1.0 = disabled
32 float penalty_freq = 0.00f; // 0.0 = disabled
33 float penalty_present = 0.00f; // 0.0 = disabled
34 int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
35 float mirostat_tau = 5.00f; // target entropy
36 float mirostat_eta = 0.10f; // learning rate
37 bool penalize_nl = false; // consider newlines as a repeatable token
38 uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampling_context
39
48
49 std::string grammar; // optional BNF-like grammar to constrain sampling
50
51 // Classifier-Free Guidance
52 // https://arxiv.org/abs/2306.17806
53 std::string cfg_negative_prompt; // string to help guidance
54 float cfg_scale = 1.f; // how strong is guidance
55
56 std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
57
58 std::vector<llama_token> penalty_prompt_tokens;
llama_sampler_type
Definition sampling-base.h:8
struct llama_sampling_params llama_sampling_params
Definition sampling-base.h:18
float penalty_repeat
Definition sampling-base.h:31
float penalty_present
Definition sampling-base.h:33
bool use_penalty_prompt_tokens
Definition sampling-base.h:59
float min_p
Definition sampling-base.h:24
int32_t penalty_last_n
Definition sampling-base.h:30
std::string cfg_negative_prompt
Definition sampling-base.h:53
float penalty_freq
Definition sampling-base.h:32
int32_t n_prev
Definition sampling-base.h:19
float top_p
Definition sampling-base.h:23
uint32_t seed
Definition sampling-base.h:38
float tfs_z
Definition sampling-base.h:25
std::vector< llama_token > penalty_prompt_tokens
Definition sampling-base.h:58
std::string grammar
Definition sampling-base.h:49
int32_t min_keep
Definition sampling-base.h:21
int32_t mirostat
Definition sampling-base.h:34
std::vector< llama_sampler_type > samplers_sequence
Definition sampling-base.h:40
std::unordered_map< llama_token, float > logit_bias
Definition sampling-base.h:56
float temp
Definition sampling-base.h:27
int32_t top_k
Definition sampling-base.h:22
float typical_p
Definition sampling-base.h:26
int32_t n_probs
Definition sampling-base.h:20
float dynatemp_range
Definition sampling-base.h:28
float mirostat_eta
Definition sampling-base.h:36
float mirostat_tau
Definition sampling-base.h:35
bool penalize_nl
Definition sampling-base.h:37
float cfg_scale
Definition sampling-base.h:54
float dynatemp_exponent
Definition sampling-base.h:29