henk717 · henk717 · Oct 14, 2023 · Sep 8, 2023
diff --git a/aiserver.py b/aiserver.py
@@ -1192,6 +1192,12 @@ def loadmodelsettings():
     if("rep_pen_range" in js):
         koboldai_vars.rep_pen_range = js["rep_pen_range"]
         koboldai_vars.default_preset['rep_pen_range'] = js["rep_pen_range"]
+    if("eps_cutoff" in js):
+        koboldai_vars.eps_cutoff = js["eps_cutoff"]
+        koboldai_vars.default_preset['eps_cutoff'] = js["eps_cutoff"]
+    if("eta_cutoff" in js):
+        koboldai_vars.eta_cutoff = js["eta_cutoff"]
+        koboldai_vars.default_preset['eta_cutoff'] = js["eta_cutoff"]
     if("adventure" in js):
         koboldai_vars.adventure = js["adventure"]
     if("chatmode" in js):
@@ -1275,6 +1281,10 @@ def processsettings(js):
         koboldai_vars.rep_pen_slope = js["rep_pen_slope"]
     if("rep_pen_range" in js):
         koboldai_vars.rep_pen_range = js["rep_pen_range"]
+    if("eps_cutoff" in js):
+        koboldai_vars.eps = js["eps_cutoff"]
+    if("eta_cutoff" in js):
+        koboldai_vars.eta = js["eta_cutoff"] 
     if("genamt" in js):
         koboldai_vars.genamt = js["genamt"]
     if("max_length" in js):
@@ -2251,6 +2261,8 @@ def lua_has_setting(setting):
         "setreppen",
         "setreppenslope",
         "setreppenrange",
+        "seteps_cutoff",
+        "seteta_cutoff",
         "settknmax",
         "setwidepth",
         "setuseprompt",
@@ -2271,6 +2283,8 @@ def lua_has_setting(setting):
         "reppen",
         "reppenslope",
         "reppenrange",
+        "eps_cutoff",
+        "eta_cutoff",
         "tknmax",
         "widepth",
         "useprompt",
@@ -2309,6 +2323,8 @@ def lua_get_setting(setting):
     if(setting in ("setreppen", "reppen")): return koboldai_vars.rep_pen
     if(setting in ("setreppenslope", "reppenslope")): return koboldai_vars.rep_pen_slope
     if(setting in ("setreppenrange", "reppenrange")): return koboldai_vars.rep_pen_range
+    if(setting in ("seteps_cutoff", "eps_cutoff")): return koboldai_vars.eps_cutoff
+    if(setting in ("seteta_cutoff", "eta_cutoff")): return koboldai_vars.eta_cutoff
     if(setting in ("settknmax", "tknmax")): return koboldai_vars.max_length
     if(setting == "anotedepth"): return koboldai_vars.andepth
     if(setting in ("setwidepth", "widepth")): return koboldai_vars.widepth
@@ -2347,6 +2363,8 @@ def lua_set_setting(setting, v):
     if(setting in ("setreppen", "reppen")): koboldai_vars.rep_pen = v
     if(setting in ("setreppenslope", "reppenslope")): koboldai_vars.rep_pen_slope = v
     if(setting in ("setreppenrange", "reppenrange")): koboldai_vars.rep_pen_range = v
+    if(setting in ("seteps_cutoff", "eps_cutoff")): koboldai_vars.eps_cutoff = v
+    if(setting in ("seteta_cutoff", "eta_cutoff")): koboldai_vars.eta_cutoff = v
     if(setting in ("settknmax", "tknmax")): koboldai_vars.max_length = v; return True
     if(setting == "anotedepth"): koboldai_vars.andepth = v; return True
     if(setting in ("setwidepth", "widepth")): koboldai_vars.widepth = v; return True
@@ -2772,6 +2790,16 @@ def get_message(msg):
         emit('from_server', {'cmd': 'setlabelreppenrange', 'data': msg['data']}, broadcast=True, room="UI_1")
         settingschanged()
         refresh_settings()
+    elif(msg['cmd'] == 'seteps_cutoff'):
+        koboldai_vars.eps_cutoff = float(msg['data'])
+        emit('from_server', {'cmd': 'setlabeleps_cutoff', 'data': msg['data']}, broadcast=True, room="UI_1")
+        settingschanged()
+        refresh_settings()
+    elif(msg['cmd'] == 'seteta_cutoff'):
+        koboldai_vars.eta_cutoff = float(msg['data'])
+        emit('from_server', {'cmd': 'setlabeleta_cutoff', 'data': msg['data']}, broadcast=True, room="UI_1")
+        settingschanged()
+        refresh_settings()
     elif(msg['cmd'] == 'setoutput'):
         koboldai_vars.genamt = int(msg['data'])
         emit('from_server', {'cmd': 'setlabeloutput', 'data': msg['data']}, broadcast=True, room="UI_1")
@@ -2922,8 +2950,8 @@ def get_message(msg):
         sendUSStatItems()
     elif(msg['cmd'] == 'samplers'):
         sampler_order = msg["data"]
-        sampler_order_min_length = 6
-        sampler_order_max_length = 7
+        sampler_order_min_length = 8
+        sampler_order_max_length = 9
         if(not isinstance(sampler_order, list)):
             raise ValueError(f"Sampler order must be a list, but got a {type(sampler_order)}")
         if(not (sampler_order_min_length <= len(sampler_order) <= sampler_order_max_length)):
@@ -3501,6 +3529,8 @@ def apiactionsubmit_tpumtjgenerate(txt, minimum, maximum):
         repetition_penalty=koboldai_vars.rep_pen,
         rpslope=koboldai_vars.rep_pen_slope,
         rprange=koboldai_vars.rep_pen_range,
+        eps_cutoff=koboldai_vars.eps_cutoff,
+        eta_cutoff=koboldai_vars.eta_cutoff,
         soft_embeddings=koboldai_vars.sp,
         soft_tokens=soft_tokens,
         sampler_order=koboldai_vars.sampler_order,
@@ -4148,6 +4178,8 @@ def refresh_settings():
         socketio.emit('from_server', {'cmd': 'updatereppen', 'data': koboldai_vars.rep_pen}, broadcast=True, room="UI_1")
         socketio.emit('from_server', {'cmd': 'updatereppenslope', 'data': koboldai_vars.rep_pen_slope}, broadcast=True, room="UI_1")
         socketio.emit('from_server', {'cmd': 'updatereppenrange', 'data': koboldai_vars.rep_pen_range}, broadcast=True, room="UI_1")
+        socketio.emit('from_server', {'cmd': 'updateeps_cutoff', 'data': koboldai_vars.eps_cutoff}, broadcast=True, room="UI_1")
+        socketio.emit('from_server', {'cmd': 'updateeta_cutoff', 'data': koboldai_vars.eta_cutoff}, broadcast=True, room="UI_1")
         socketio.emit('from_server', {'cmd': 'updateoutlen', 'data': koboldai_vars.genamt}, broadcast=True, room="UI_1")
         socketio.emit('from_server', {'cmd': 'updatetknmax', 'data': koboldai_vars.max_length}, broadcast=True, room="UI_1")
         socketio.emit('from_server', {'cmd': 'updatenumseq', 'data': koboldai_vars.numseqs}, broadcast=True, room="UI_1")
@@ -7146,7 +7178,7 @@ def UI_2_load_cookies():
 def UI_2_save_new_preset(data):
     preset = model_info()
     #Data to get from current settings
-    for item in ["genamt", "rep_pen", "rep_pen_range", "rep_pen_slope", "sampler_order", "temp", "tfs", "top_a", "top_k", "top_p", "typical"]:
+    for item in ["genamt", "rep_pen", "rep_pen_range", "rep_pen_slope", "sampler_order", "temp", "tfs", "top_a", "top_k", "top_p", "typical", "eps_cutoff", "eta_cutoff"]:
         preset[item] = getattr(koboldai_vars, item)
     #Data to get from UI
     for item in ['preset', 'description']:
@@ -8131,6 +8163,9 @@ class SamplerSettingsSchema(KoboldSchema):
     tfs: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1), metadata={"description": "Tail free sampling value."})
     typical: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1), metadata={"description": "Typical sampling value."})
     temperature: Optional[float] = fields.Float(validate=validate.Range(min=0, min_inclusive=False), metadata={"description": "Temperature value."})
+    eps_cutoff: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1000.0), metadata={"description": "Epsilon sampling value."})
+    eta_cutoff: Optional[float] = fields.Float(validate=validate.Range(min=0,), metadata={"description": "Eta sampling value."})
+
 
 def soft_prompt_validator(soft_prompt: str):
     if len(soft_prompt.strip()) == 0:
@@ -8181,7 +8216,7 @@ class Meta:
     disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all input formatting options default to `false` instead of the value in the KoboldAI GUI"})
     frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
     quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."})
-    sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 6 and the array must be a permutation of the first N non-negative integers."})
+    sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=8), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 8 and the array must be a permutation of the first N non-negative integers."})
     sampler_seed: Optional[int] = fields.Integer(validate=validate.Range(min=0, max=2**64 - 1), metadata={"description": "RNG seed to use for sampling. If not specified, the global RNG will be used."})
     sampler_full_determinism: Optional[bool] = fields.Boolean(metadata={"description": "If enabled, the generated text will always be the same as long as you use the same RNG seed, input and settings. If disabled, only the *sequence* of generated texts that you get when repeatedly generating text will be the same given the same RNG seed, input and settings."})
     stop_sequence: Optional[List[str]] = fields.List(fields.String(),metadata={"description": "An array of string sequences where the API will stop generating further tokens. The returned text WILL contain the stop sequence."})
@@ -8299,7 +8334,7 @@ def _generate_text(body: GenerationInputSchema):
                 torch.manual_seed(body.sampler_seed)
         koboldai_vars.rng_states[body.sampler_seed] = tpu_mtj_backend.get_rng_state() if koboldai_vars.use_colab_tpu else torch.get_rng_state()
     if hasattr(body, "sampler_order"):
-        if len(body.sampler_order) < 7:
+        if len(body.sampler_order) < 9:
             body.sampler_order = [6] + body.sampler_order
     # This maps each property of the setting to use when sending the generate idempotently
     # To the object which typically contains it's value
@@ -8317,6 +8352,8 @@ def _generate_text(body: GenerationInputSchema):
         "tfs": ("koboldai_vars", "tfs", None),
         "typical": ("koboldai_vars", "typical", None),
         "temperature": ("koboldai_vars", "temp", None),
+        "eps_cutoff": ("koboldai_vars", "eps_cutoff", None),
+        "eta_cutoff": ("koboldai_vars", "eta_cutoff", None),
         "frmtadsnsp": ("koboldai_vars", "frmtadsnsp", "input"),
         "frmttriminc": ("koboldai_vars", "frmttriminc", "output"),
         "frmtrmblln": ("koboldai_vars", "frmtrmblln", "output"),
@@ -10762,6 +10799,26 @@ class KoboldMeta:
         name = "temperature"
         example_yaml_value = "0.5"
 
+@config_endpoint_schema
+class EpsilonSamplingSettingSchema(KoboldSchema):
+    value = fields.Float(validate=validate.Range(min=0, max=1000), required=True)
+    class KoboldMeta:
+        route_name = "eps_cutoff"
+        obj = "koboldai_vars"
+        var_name = "eps_cutoff"
+        name = "Epsilon sampling"
+        example_yaml_value = "0.0"
+
+@config_endpoint_schema
+class EtaSamplingSettingSchema(KoboldSchema):
+    value = fields.Float(validate=validate.Range(min=0), required=True)
+    class KoboldMeta:
+        route_name = "eta_cutoff"
+        obj = "koboldai_vars"
+        var_name = "eta_cutoff"
+        name = "Eta sampling"
+        example_yaml_value = "0.0"
+
 @config_endpoint_schema
 class GensPerActionSettingSchema(KoboldSchema):
     value = fields.Integer(validate=validate.Range(min=0, max=5), required=True)
@@ -10870,7 +10927,7 @@ class KoboldMeta:
         obj = "koboldai_vars"
         var_name = "sampler_order"
         name = "sampler order"
-        example_yaml_value = "[6, 0, 1, 2, 3, 4, 5]"
+        example_yaml_value = "[6, 0, 1, 2, 3, 4, 5, 7, 8]"
 
 @config_endpoint_schema
 class SamplerFullDeterminismSettingSchema(KoboldSchema):

diff --git a/api_example.py b/api_example.py
@@ -23,7 +23,9 @@ def get_prompt(user_msg):
         "top_k": 0, # Keep the X most probable tokens
         "top_p": 0.9, # Top P sampling / Nucleus Sampling, https://arxiv.org/pdf/1904.09751.pdf
         "typical": 1.0, # Typical Sampling, https://arxiv.org/pdf/2202.00666.pdf
-        "sampler_order": [6,0,1,3,4,2,5], # Order to apply the samplers, our default in this script is already the optimal one. KoboldAI Lite contains an easy list of what the
+        "eps": 0.0, # Discard tokens with low probability, from https://arxiv.org/pdf/2210.15191.pdf
+        "eta": 0.0, # Entropy adaptive epsilon, from the same work as epsilon
+        "sampler_order": [6,0,7,1,3,8,4,2,5], # Order to apply the samplers, our default in this script is already the optimal one. KoboldAI Lite contains an easy list of what the
         "stop_sequence": [f"{user}"], # When should the AI stop generating? In this example we stop when it tries to speak on behalf of the user.
         #"sampler_seed": 1337, # Use specific seed for text generation? This helps with consistency across tests.
         "singleline": "False", # Only return a response that fits on a single line, this can help with chatbots but also makes them less verbose

diff --git a/bridge.lua b/bridge.lua
@@ -890,6 +890,8 @@ return function(_python, _bridged)
     ---@field reppen number
     ---@field reppenslope number
     ---@field reppenrange number
+    ---@field eps_cutoff number
+    ---@field eta_cutoff number
     ---@field tknmax integer
     ---@field widepth integer
     ---@field useprompt boolean

diff --git a/gensettings.py b/gensettings.py
@@ -176,6 +176,38 @@
     "name": "use_alt_rep_pen",
     "ui_level": 2
  	},
+    {
+	"uitype": "slider",
+	"unit": "float",
+	"label": "Epsilon Sampling",
+	"id": "seteps_cutoff", 
+	"min": 0.0,
+	"max": 9.0,
+	"step": 0.01,
+	"default": 0.0,
+    "tooltip": "Slider is in units of 1e-4. Discards tokens with probabilities under eps. (Put this value on 0 to disable its effect)",
+    "menu_path": "Settings",
+    "sub_path":  "Sampling",
+    "classname": "model",
+    "name": "eps_cutoff",
+    "ui_level": 1
+	},
+    {
+	"uitype": "slider",
+	"unit": "float",
+	"label": "Eta Sampling",
+	"id": "seteta_cutoff", 
+	"min": 0.0,
+	"max": 20,
+	"step": 0.01,
+	"default": 0.0,
+    "tooltip": "Slider is in units of 1e-4.Eta sampling is a variant of epsilon sampling that adaptively estimates eps based on entropy of the output. (Put this value on 0 to disable its effect)",
+    "menu_path": "Settings",
+    "sub_path":  "Sampling",
+    "classname": "model",
+    "name": "eta_cutoff",
+    "ui_level": 1
+	},
     {
 	"uitype": "slider",
 	"unit": "int",
@@ -1085,6 +1117,36 @@
     "classname": "model",
     "name": "tfs"
 	},
+    {
+	"uitype": "slider",
+	"unit": "float",
+	"label": "Epsilon Sampling",
+	"id": "seteps_cutoff", 
+	"min": 0.0,
+	"max": 9.0,
+	"step": 0.01,
+	"default": 0.0,
+    "tooltip": "Slider is in units of 1e-4.Discards tokens with probabilities under eps.",
+    "menu_path": "Settings",
+    "sub_path":  "Sampling",
+    "classname": "model",
+    "name": "eps_cutoff",
+	},
+    {
+	"uitype": "slider",
+	"unit": "float",
+	"label": "Eta Sampling",
+	"id": "seteta_cutoff", 
+	"min": 0.0,
+	"max": 20,
+	"step": 0.01,
+	"default": 0.0,
+    "tooltip": "Slider is in units of 1e-4.Eta sampling is a variant of epsilon sampling that adaptively estimates eps based on entropy of the output.",
+    "menu_path": "Settings",
+    "sub_path":  "Sampling",
+    "classname": "model",
+    "name": "eta_cutoff",
+	},
     {
 	"uitype": "slider",
 	"unit": "int",

diff --git a/koboldai_settings.py b/koboldai_settings.py
@@ -614,7 +614,7 @@ def from_json(self, data):
             start_time = time.time()
             if key in self.__dict__ and key not in self.no_save_variables:
                 if key == 'sampler_order':
-                    if(len(value) < 7):
+                    if(len(value) < 9):
                         value = [6] + value
                 elif key == 'autosave':
                     autosave = value
@@ -669,8 +669,9 @@ class model_settings(settings):
                          'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',
                          'badwordsids', 'uid_presets', 'model', 'model_type', 'lazy_load', 'fp32_model', 'modeldim', 'horde_wait_time', 'horde_queue_position', 'horde_queue_size', 'newlinemode', 'tqdm_progress', 'tqdm_rem_time', '_tqdm']
     settings_name = "model"
-    default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 1.0, "rep_pen_range": 2048, "temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0,
-                        "sampler_order": [6,0,1,2,3,4,5]}
+    default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 1.0, "rep_pen_range": 2048,
+                        "temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0, "eps_cutoff": 0.0, "eta_cutoff": 0.0,
+                        "sampler_order": [6,0,7,1,3,8,4,2,5]}
     def __init__(self, socketio, koboldai_vars):
         self.enable_whitelist = False
         self._socketio = socketio
@@ -721,12 +722,14 @@ def reset_for_model_load(self):
         self.top_a       = 0.0     # Default generator top-a
         self.tfs         = 1.0     # Default generator tfs (tail-free sampling)
         self.typical     = 1.0     # Default generator typical sampling threshold
+        self.eps_cutoff  = 0.0     # Default generator epsilon_cutoff
+        self.eta_cutoff  = 0.0     # Default generator eta_cutoff
         self.numseqs     = 1       # Number of sequences to ask the generator to create
         self.generated_tkns = 0    # If using a backend that supports Lua generation modifiers, how many tokens have already been generated, otherwise 0
         self.badwordsids = []
         self.fp32_model  = False  # Whether or not the most recently loaded HF model was in fp32 format
         self.modeldim    = -1     # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B)
-        self.sampler_order = [6, 0, 1, 2, 3, 4, 5]
+        self.sampler_order = [6, 0, 1, 2, 3, 4, 5, 7, 8]
         self.newlinemode = "n"
         self.presets     = []   # Holder for presets
         self.selected_preset = ""
@@ -758,6 +761,8 @@ def __setattr__(self, name, value):
             self.top_a = 0.0
             self.tfs = 1.0
             self.typical = 1.0
+            self.eps_cutoff = 0.0
+            self.eta_cutoff = 0.0
             self.rep_pen_range = 1024
             self.rep_pen_slope = 0.7
 
@@ -2763,14 +2768,18 @@ def wi_images(self) -> str:
         "rep_pen": 1.1,
         "rep_pen_range": 1024,
         "rep_pen_slope": 0.7,
+        "eps_cutoff": 0.0,
+        "eta_cutoff": 0.0,
         "sampler_order": [
             6,
             0,
             1,
             2,
             3,
             4,
-            5
+            5,
+            7,
+            8
         ]
     }
 badwordsids_default = [[6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting

diff --git a/modeling/inference_model.py b/modeling/inference_model.py
@@ -134,6 +134,8 @@ def __init__(self, **overrides) -> None:
             "rep_pen",
             "rep_pen_slope",
             "rep_pen_range",
+            "eps_cutoff",
+            "eta_cutoff",
             "sampler_order",
         ]:
             setattr(