From 7f898a18fff1175edc7d0b22aaee39508f72f94d Mon Sep 17 00:00:00 2001 From: tazlin Date: Fri, 17 Jan 2025 09:56:36 -0500 Subject: [PATCH 1/2] fix: token suspicion adjustment Once upon a time, before batching and other optimizations, these were the speeds we considered unreasonable but new paradigms, backends and breakthroughs have made these numbers increasingly inaccurate or irrelevant. While I do think there has to be some sort of longer term (such as the problem detailed in https://github.com/Haidra-Org/AI-Horde/issues/463), there have been virtually *only* false positives, and the few true positives boiled down to innocent misconfigurations. Further, it appears that certain terms of worker-reported failures can artificially inflate token count, which may be its own issue. For the time being, I am advocating that the number is increased to 100t/s, as recommended by henky, and that we respond to possible abuse of this relaxation with other, more complete and sound, measures. --- horde/classes/kobold/processing_generation.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/horde/classes/kobold/processing_generation.py b/horde/classes/kobold/processing_generation.py index a28f8be9..e8ae7ba8 100644 --- a/horde/classes/kobold/processing_generation.py +++ b/horde/classes/kobold/processing_generation.py @@ -111,13 +111,22 @@ def record(self, things_per_sec, kudos): return param_multiplier = model_reference.get_text_model_multiplier(self.model) unreasonable_speed = hv.suspicion_thresholds["text"] + + # max_speed_per_multiplier = { + # 70: 12, + # 40: 22, + # 20: 35, + # 13: 50, + # 7: 70, + # } + + # Once upon a time, before batching and other optimizations, these were the speeds we considered unreasonable + # but new paradigms, backends and breakthroughs have made these numbers increasingly inaccurate or irrelevant. + max_speed_per_multiplier = { - 70: 12, - 40: 22, - 20: 35, - 13: 50, - 7: 70, + 1: 100, } + for params_count in max_speed_per_multiplier: if param_multiplier >= params_count: unreasonable_speed = max_speed_per_multiplier[params_count] From f6a79156c1fa6c65a0e63245d8a2384b5c917335 Mon Sep 17 00:00:00 2001 From: tazlin Date: Fri, 17 Jan 2025 10:57:06 -0500 Subject: [PATCH 2/2] fix: add two gradations to the text token/s thresholds --- horde/classes/kobold/processing_generation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/horde/classes/kobold/processing_generation.py b/horde/classes/kobold/processing_generation.py index e8ae7ba8..1742041c 100644 --- a/horde/classes/kobold/processing_generation.py +++ b/horde/classes/kobold/processing_generation.py @@ -124,7 +124,9 @@ def record(self, things_per_sec, kudos): # but new paradigms, backends and breakthroughs have made these numbers increasingly inaccurate or irrelevant. max_speed_per_multiplier = { - 1: 100, + 70: 30, + 14: 100, + 8: 150, } for params_count in max_speed_per_multiplier: