Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add epsilon and eta sampling #475

Merged
merged 1 commit into from
Oct 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 63 additions & 6 deletions aiserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1192,6 +1192,12 @@ def loadmodelsettings():
if("rep_pen_range" in js):
koboldai_vars.rep_pen_range = js["rep_pen_range"]
koboldai_vars.default_preset['rep_pen_range'] = js["rep_pen_range"]
if("eps_cutoff" in js):
koboldai_vars.eps_cutoff = js["eps_cutoff"]
koboldai_vars.default_preset['eps_cutoff'] = js["eps_cutoff"]
if("eta_cutoff" in js):
koboldai_vars.eta_cutoff = js["eta_cutoff"]
koboldai_vars.default_preset['eta_cutoff'] = js["eta_cutoff"]
if("adventure" in js):
koboldai_vars.adventure = js["adventure"]
if("chatmode" in js):
Expand Down Expand Up @@ -1275,6 +1281,10 @@ def processsettings(js):
koboldai_vars.rep_pen_slope = js["rep_pen_slope"]
if("rep_pen_range" in js):
koboldai_vars.rep_pen_range = js["rep_pen_range"]
if("eps_cutoff" in js):
koboldai_vars.eps = js["eps_cutoff"]
if("eta_cutoff" in js):
koboldai_vars.eta = js["eta_cutoff"]
if("genamt" in js):
koboldai_vars.genamt = js["genamt"]
if("max_length" in js):
Expand Down Expand Up @@ -2251,6 +2261,8 @@ def lua_has_setting(setting):
"setreppen",
"setreppenslope",
"setreppenrange",
"seteps_cutoff",
"seteta_cutoff",
"settknmax",
"setwidepth",
"setuseprompt",
Expand All @@ -2271,6 +2283,8 @@ def lua_has_setting(setting):
"reppen",
"reppenslope",
"reppenrange",
"eps_cutoff",
"eta_cutoff",
"tknmax",
"widepth",
"useprompt",
Expand Down Expand Up @@ -2309,6 +2323,8 @@ def lua_get_setting(setting):
if(setting in ("setreppen", "reppen")): return koboldai_vars.rep_pen
if(setting in ("setreppenslope", "reppenslope")): return koboldai_vars.rep_pen_slope
if(setting in ("setreppenrange", "reppenrange")): return koboldai_vars.rep_pen_range
if(setting in ("seteps_cutoff", "eps_cutoff")): return koboldai_vars.eps_cutoff
if(setting in ("seteta_cutoff", "eta_cutoff")): return koboldai_vars.eta_cutoff
if(setting in ("settknmax", "tknmax")): return koboldai_vars.max_length
if(setting == "anotedepth"): return koboldai_vars.andepth
if(setting in ("setwidepth", "widepth")): return koboldai_vars.widepth
Expand Down Expand Up @@ -2347,6 +2363,8 @@ def lua_set_setting(setting, v):
if(setting in ("setreppen", "reppen")): koboldai_vars.rep_pen = v
if(setting in ("setreppenslope", "reppenslope")): koboldai_vars.rep_pen_slope = v
if(setting in ("setreppenrange", "reppenrange")): koboldai_vars.rep_pen_range = v
if(setting in ("seteps_cutoff", "eps_cutoff")): koboldai_vars.eps_cutoff = v
if(setting in ("seteta_cutoff", "eta_cutoff")): koboldai_vars.eta_cutoff = v
if(setting in ("settknmax", "tknmax")): koboldai_vars.max_length = v; return True
if(setting == "anotedepth"): koboldai_vars.andepth = v; return True
if(setting in ("setwidepth", "widepth")): koboldai_vars.widepth = v; return True
Expand Down Expand Up @@ -2772,6 +2790,16 @@ def get_message(msg):
emit('from_server', {'cmd': 'setlabelreppenrange', 'data': msg['data']}, broadcast=True, room="UI_1")
settingschanged()
refresh_settings()
elif(msg['cmd'] == 'seteps_cutoff'):
koboldai_vars.eps_cutoff = float(msg['data'])
emit('from_server', {'cmd': 'setlabeleps_cutoff', 'data': msg['data']}, broadcast=True, room="UI_1")
settingschanged()
refresh_settings()
elif(msg['cmd'] == 'seteta_cutoff'):
koboldai_vars.eta_cutoff = float(msg['data'])
emit('from_server', {'cmd': 'setlabeleta_cutoff', 'data': msg['data']}, broadcast=True, room="UI_1")
settingschanged()
refresh_settings()
elif(msg['cmd'] == 'setoutput'):
koboldai_vars.genamt = int(msg['data'])
emit('from_server', {'cmd': 'setlabeloutput', 'data': msg['data']}, broadcast=True, room="UI_1")
Expand Down Expand Up @@ -2922,8 +2950,8 @@ def get_message(msg):
sendUSStatItems()
elif(msg['cmd'] == 'samplers'):
sampler_order = msg["data"]
sampler_order_min_length = 6
sampler_order_max_length = 7
sampler_order_min_length = 8
sampler_order_max_length = 9
if(not isinstance(sampler_order, list)):
raise ValueError(f"Sampler order must be a list, but got a {type(sampler_order)}")
if(not (sampler_order_min_length <= len(sampler_order) <= sampler_order_max_length)):
Expand Down Expand Up @@ -3501,6 +3529,8 @@ def apiactionsubmit_tpumtjgenerate(txt, minimum, maximum):
repetition_penalty=koboldai_vars.rep_pen,
rpslope=koboldai_vars.rep_pen_slope,
rprange=koboldai_vars.rep_pen_range,
eps_cutoff=koboldai_vars.eps_cutoff,
eta_cutoff=koboldai_vars.eta_cutoff,
soft_embeddings=koboldai_vars.sp,
soft_tokens=soft_tokens,
sampler_order=koboldai_vars.sampler_order,
Expand Down Expand Up @@ -4148,6 +4178,8 @@ def refresh_settings():
socketio.emit('from_server', {'cmd': 'updatereppen', 'data': koboldai_vars.rep_pen}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updatereppenslope', 'data': koboldai_vars.rep_pen_slope}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updatereppenrange', 'data': koboldai_vars.rep_pen_range}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updateeps_cutoff', 'data': koboldai_vars.eps_cutoff}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updateeta_cutoff', 'data': koboldai_vars.eta_cutoff}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updateoutlen', 'data': koboldai_vars.genamt}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updatetknmax', 'data': koboldai_vars.max_length}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updatenumseq', 'data': koboldai_vars.numseqs}, broadcast=True, room="UI_1")
Expand Down Expand Up @@ -7146,7 +7178,7 @@ def UI_2_load_cookies():
def UI_2_save_new_preset(data):
preset = model_info()
#Data to get from current settings
for item in ["genamt", "rep_pen", "rep_pen_range", "rep_pen_slope", "sampler_order", "temp", "tfs", "top_a", "top_k", "top_p", "typical"]:
for item in ["genamt", "rep_pen", "rep_pen_range", "rep_pen_slope", "sampler_order", "temp", "tfs", "top_a", "top_k", "top_p", "typical", "eps_cutoff", "eta_cutoff"]:
preset[item] = getattr(koboldai_vars, item)
#Data to get from UI
for item in ['preset', 'description']:
Expand Down Expand Up @@ -8131,6 +8163,9 @@ class SamplerSettingsSchema(KoboldSchema):
tfs: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1), metadata={"description": "Tail free sampling value."})
typical: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1), metadata={"description": "Typical sampling value."})
temperature: Optional[float] = fields.Float(validate=validate.Range(min=0, min_inclusive=False), metadata={"description": "Temperature value."})
eps_cutoff: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1000.0), metadata={"description": "Epsilon sampling value."})
eta_cutoff: Optional[float] = fields.Float(validate=validate.Range(min=0,), metadata={"description": "Eta sampling value."})


def soft_prompt_validator(soft_prompt: str):
if len(soft_prompt.strip()) == 0:
Expand Down Expand Up @@ -8181,7 +8216,7 @@ class Meta:
disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all input formatting options default to `false` instead of the value in the KoboldAI GUI"})
frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."})
sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 6 and the array must be a permutation of the first N non-negative integers."})
sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=8), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 8 and the array must be a permutation of the first N non-negative integers."})
sampler_seed: Optional[int] = fields.Integer(validate=validate.Range(min=0, max=2**64 - 1), metadata={"description": "RNG seed to use for sampling. If not specified, the global RNG will be used."})
sampler_full_determinism: Optional[bool] = fields.Boolean(metadata={"description": "If enabled, the generated text will always be the same as long as you use the same RNG seed, input and settings. If disabled, only the *sequence* of generated texts that you get when repeatedly generating text will be the same given the same RNG seed, input and settings."})
stop_sequence: Optional[List[str]] = fields.List(fields.String(),metadata={"description": "An array of string sequences where the API will stop generating further tokens. The returned text WILL contain the stop sequence."})
Expand Down Expand Up @@ -8299,7 +8334,7 @@ def _generate_text(body: GenerationInputSchema):
torch.manual_seed(body.sampler_seed)
koboldai_vars.rng_states[body.sampler_seed] = tpu_mtj_backend.get_rng_state() if koboldai_vars.use_colab_tpu else torch.get_rng_state()
if hasattr(body, "sampler_order"):
if len(body.sampler_order) < 7:
if len(body.sampler_order) < 9:
body.sampler_order = [6] + body.sampler_order
# This maps each property of the setting to use when sending the generate idempotently
# To the object which typically contains it's value
Expand All @@ -8317,6 +8352,8 @@ def _generate_text(body: GenerationInputSchema):
"tfs": ("koboldai_vars", "tfs", None),
"typical": ("koboldai_vars", "typical", None),
"temperature": ("koboldai_vars", "temp", None),
"eps_cutoff": ("koboldai_vars", "eps_cutoff", None),
"eta_cutoff": ("koboldai_vars", "eta_cutoff", None),
"frmtadsnsp": ("koboldai_vars", "frmtadsnsp", "input"),
"frmttriminc": ("koboldai_vars", "frmttriminc", "output"),
"frmtrmblln": ("koboldai_vars", "frmtrmblln", "output"),
Expand Down Expand Up @@ -10762,6 +10799,26 @@ class KoboldMeta:
name = "temperature"
example_yaml_value = "0.5"

@config_endpoint_schema
class EpsilonSamplingSettingSchema(KoboldSchema):
value = fields.Float(validate=validate.Range(min=0, max=1000), required=True)
class KoboldMeta:
route_name = "eps_cutoff"
obj = "koboldai_vars"
var_name = "eps_cutoff"
name = "Epsilon sampling"
example_yaml_value = "0.0"

@config_endpoint_schema
class EtaSamplingSettingSchema(KoboldSchema):
value = fields.Float(validate=validate.Range(min=0), required=True)
class KoboldMeta:
route_name = "eta_cutoff"
obj = "koboldai_vars"
var_name = "eta_cutoff"
name = "Eta sampling"
example_yaml_value = "0.0"

@config_endpoint_schema
class GensPerActionSettingSchema(KoboldSchema):
value = fields.Integer(validate=validate.Range(min=0, max=5), required=True)
Expand Down Expand Up @@ -10870,7 +10927,7 @@ class KoboldMeta:
obj = "koboldai_vars"
var_name = "sampler_order"
name = "sampler order"
example_yaml_value = "[6, 0, 1, 2, 3, 4, 5]"
example_yaml_value = "[6, 0, 1, 2, 3, 4, 5, 7, 8]"

@config_endpoint_schema
class SamplerFullDeterminismSettingSchema(KoboldSchema):
Expand Down
4 changes: 3 additions & 1 deletion api_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ def get_prompt(user_msg):
"top_k": 0, # Keep the X most probable tokens
"top_p": 0.9, # Top P sampling / Nucleus Sampling, https://arxiv.org/pdf/1904.09751.pdf
"typical": 1.0, # Typical Sampling, https://arxiv.org/pdf/2202.00666.pdf
"sampler_order": [6,0,1,3,4,2,5], # Order to apply the samplers, our default in this script is already the optimal one. KoboldAI Lite contains an easy list of what the
"eps": 0.0, # Discard tokens with low probability, from https://arxiv.org/pdf/2210.15191.pdf
"eta": 0.0, # Entropy adaptive epsilon, from the same work as epsilon
"sampler_order": [6,0,7,1,3,8,4,2,5], # Order to apply the samplers, our default in this script is already the optimal one. KoboldAI Lite contains an easy list of what the
"stop_sequence": [f"{user}"], # When should the AI stop generating? In this example we stop when it tries to speak on behalf of the user.
#"sampler_seed": 1337, # Use specific seed for text generation? This helps with consistency across tests.
"singleline": "False", # Only return a response that fits on a single line, this can help with chatbots but also makes them less verbose
Expand Down
2 changes: 2 additions & 0 deletions bridge.lua
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,8 @@ return function(_python, _bridged)
---@field reppen number
---@field reppenslope number
---@field reppenrange number
---@field eps_cutoff number
---@field eta_cutoff number
---@field tknmax integer
---@field widepth integer
---@field useprompt boolean
Expand Down
62 changes: 62 additions & 0 deletions gensettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,38 @@
"name": "use_alt_rep_pen",
"ui_level": 2
},
{
"uitype": "slider",
"unit": "float",
"label": "Epsilon Sampling",
"id": "seteps_cutoff",
"min": 0.0,
"max": 9.0,
"step": 0.01,
"default": 0.0,
"tooltip": "Slider is in units of 1e-4. Discards tokens with probabilities under eps. (Put this value on 0 to disable its effect)",
"menu_path": "Settings",
"sub_path": "Sampling",
"classname": "model",
"name": "eps_cutoff",
"ui_level": 1
},
{
"uitype": "slider",
"unit": "float",
"label": "Eta Sampling",
"id": "seteta_cutoff",
"min": 0.0,
"max": 20,
"step": 0.01,
"default": 0.0,
"tooltip": "Slider is in units of 1e-4.Eta sampling is a variant of epsilon sampling that adaptively estimates eps based on entropy of the output. (Put this value on 0 to disable its effect)",
"menu_path": "Settings",
"sub_path": "Sampling",
"classname": "model",
"name": "eta_cutoff",
"ui_level": 1
},
{
"uitype": "slider",
"unit": "int",
Expand Down Expand Up @@ -1085,6 +1117,36 @@
"classname": "model",
"name": "tfs"
},
{
"uitype": "slider",
"unit": "float",
"label": "Epsilon Sampling",
"id": "seteps_cutoff",
"min": 0.0,
"max": 9.0,
"step": 0.01,
"default": 0.0,
"tooltip": "Slider is in units of 1e-4.Discards tokens with probabilities under eps.",
"menu_path": "Settings",
"sub_path": "Sampling",
"classname": "model",
"name": "eps_cutoff",
},
{
"uitype": "slider",
"unit": "float",
"label": "Eta Sampling",
"id": "seteta_cutoff",
"min": 0.0,
"max": 20,
"step": 0.01,
"default": 0.0,
"tooltip": "Slider is in units of 1e-4.Eta sampling is a variant of epsilon sampling that adaptively estimates eps based on entropy of the output.",
"menu_path": "Settings",
"sub_path": "Sampling",
"classname": "model",
"name": "eta_cutoff",
},
{
"uitype": "slider",
"unit": "int",
Expand Down
19 changes: 14 additions & 5 deletions koboldai_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ def from_json(self, data):
start_time = time.time()
if key in self.__dict__ and key not in self.no_save_variables:
if key == 'sampler_order':
if(len(value) < 7):
if(len(value) < 9):
value = [6] + value
elif key == 'autosave':
autosave = value
Expand Down Expand Up @@ -669,8 +669,9 @@ class model_settings(settings):
'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',
'badwordsids', 'uid_presets', 'model', 'model_type', 'lazy_load', 'fp32_model', 'modeldim', 'horde_wait_time', 'horde_queue_position', 'horde_queue_size', 'newlinemode', 'tqdm_progress', 'tqdm_rem_time', '_tqdm']
settings_name = "model"
default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 1.0, "rep_pen_range": 2048, "temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0,
"sampler_order": [6,0,1,2,3,4,5]}
default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 1.0, "rep_pen_range": 2048,
"temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0, "eps_cutoff": 0.0, "eta_cutoff": 0.0,
"sampler_order": [6,0,7,1,3,8,4,2,5]}
def __init__(self, socketio, koboldai_vars):
self.enable_whitelist = False
self._socketio = socketio
Expand Down Expand Up @@ -721,12 +722,14 @@ def reset_for_model_load(self):
self.top_a = 0.0 # Default generator top-a
self.tfs = 1.0 # Default generator tfs (tail-free sampling)
self.typical = 1.0 # Default generator typical sampling threshold
self.eps_cutoff = 0.0 # Default generator epsilon_cutoff
self.eta_cutoff = 0.0 # Default generator eta_cutoff
self.numseqs = 1 # Number of sequences to ask the generator to create
self.generated_tkns = 0 # If using a backend that supports Lua generation modifiers, how many tokens have already been generated, otherwise 0
self.badwordsids = []
self.fp32_model = False # Whether or not the most recently loaded HF model was in fp32 format
self.modeldim = -1 # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B)
self.sampler_order = [6, 0, 1, 2, 3, 4, 5]
self.sampler_order = [6, 0, 1, 2, 3, 4, 5, 7, 8]
self.newlinemode = "n"
self.presets = [] # Holder for presets
self.selected_preset = ""
Expand Down Expand Up @@ -758,6 +761,8 @@ def __setattr__(self, name, value):
self.top_a = 0.0
self.tfs = 1.0
self.typical = 1.0
self.eps_cutoff = 0.0
self.eta_cutoff = 0.0
self.rep_pen_range = 1024
self.rep_pen_slope = 0.7

Expand Down Expand Up @@ -2763,14 +2768,18 @@ def wi_images(self) -> str:
"rep_pen": 1.1,
"rep_pen_range": 1024,
"rep_pen_slope": 0.7,
"eps_cutoff": 0.0,
"eta_cutoff": 0.0,
"sampler_order": [
6,
0,
1,
2,
3,
4,
5
5,
7,
8
]
}
badwordsids_default = [[6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting
Expand Down
2 changes: 2 additions & 0 deletions modeling/inference_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ def __init__(self, **overrides) -> None:
"rep_pen",
"rep_pen_slope",
"rep_pen_range",
"eps_cutoff",
"eta_cutoff",
"sampler_order",
]:
setattr(
Expand Down
Loading