Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Beta38 #38

Merged
merged 37 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
ea4bcc6
beta1
Hoshinonyaruko Jan 21, 2024
5002a28
beta2
Hoshinonyaruko Jan 21, 2024
969841e
beta3
Hoshinonyaruko Jan 21, 2024
a2989c2
beta4
Hoshinonyaruko Jan 21, 2024
611cd8b
beta5
Hoshinonyaruko Jan 21, 2024
dbad42b
beta6
Hoshinonyaruko Jan 21, 2024
4f8895e
beta7
Hoshinonyaruko Jan 21, 2024
30a9cab
beta8
Hoshinonyaruko Jan 21, 2024
461d879
beta9
Hoshinonyaruko Jan 21, 2024
c90c2ee
beta10
Hoshinonyaruko Jan 21, 2024
5227c32
beta11
Hoshinonyaruko Jan 21, 2024
8e14e56
beta12
Hoshinonyaruko Jan 21, 2024
6adfb7f
beta13
Hoshinonyaruko Jan 21, 2024
b04f6c0
beta14
Hoshinonyaruko Jan 21, 2024
649e740
beta15
Hoshinonyaruko Jan 21, 2024
d55cf29
beta16
Hoshinonyaruko Jan 21, 2024
1cc351e
beta16
Hoshinonyaruko Jan 21, 2024
f1a373e
beta19
Hoshinonyaruko Jan 21, 2024
cbf2fe1
beta20
Hoshinonyaruko Jan 21, 2024
3485bfc
beta21
Hoshinonyaruko Jan 21, 2024
3e3bb1a
beta22
Hoshinonyaruko Jan 21, 2024
2e4f3fa
beta23
Hoshinonyaruko Mar 29, 2024
f74075d
beta24
Hoshinonyaruko Mar 29, 2024
f3913cf
beta25
Hoshinonyaruko Mar 29, 2024
4cfb51a
beta27
Hoshinonyaruko Mar 30, 2024
d9b26c0
beta28
Hoshinonyaruko Mar 31, 2024
19da709
beta29
Hoshinonyaruko Mar 31, 2024
e06dcbf
merge
Hoshinonyaruko Mar 31, 2024
c76923e
beta30
Hoshinonyaruko Mar 31, 2024
9ae21da
beta31
Hoshinonyaruko Apr 1, 2024
4f333bc
merge
Hoshinonyaruko Apr 1, 2024
5acba8f
beta33
Hoshinonyaruko Apr 1, 2024
c637fcd
beta34
Hoshinonyaruko Apr 1, 2024
4e85b37
beta35
Hoshinonyaruko Apr 1, 2024
970948e
beta36
Hoshinonyaruko Apr 1, 2024
14271c3
beta37
Hoshinonyaruko Apr 1, 2024
6154d1a
beta38
Hoshinonyaruko Apr 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
config.yml
*.sqlite
sensitive_words.txt
sensitive_words_in.txt
sensitive_words_out.txt
white.txt

# Go specific
Expand Down
120 changes: 96 additions & 24 deletions acnode/acnode.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"log"
"os"
"sort"
"strings"
"unicode/utf16"

Expand All @@ -15,16 +16,24 @@ import (

// 定义包级别的全局变量
var ac *AhoCorasick
var acout *AhoCorasick
var wac *AhoCorasick

// init函数用于初始化操作
func init() {
ac = NewAhoCorasick()
acout = NewAhoCorasick()
wac = NewAhoCorasick()

// 载入敏感词库
if err := loadWordsIntoAC(ac, "sensitive_words.txt"); err != nil {
log.Fatalf("初始化敏感词库失败:%v", err)
// 载入敏感词库 入
if err := loadWordsIntoAC(ac, "sensitive_words_in.txt"); err != nil {
log.Fatalf("初始化敏感入词库失败:%v", err)
// 注意,log.Fatalf会调用os.Exit(1)终止程序,因此后面的return不是必须的
}

// 载入敏感词库 出
if err := loadWordsIntoAC(acout, "sensitive_words_out.txt"); err != nil {
log.Fatalf("初始化敏感出词库失败:%v", err)
// 注意,log.Fatalf会调用os.Exit(1)终止程序,因此后面的return不是必须的
}

Expand All @@ -49,6 +58,13 @@ type AhoCorasick struct {
root *ACNode
}

// Replacement结构体来记录替换信息
type Replacement struct {
Start int // 替换起始位置
End int // 替换结束位置
Text string // 替换文本
}

func NewAhoCorasick() *AhoCorasick {
return &AhoCorasick{
root: &ACNode{children: make(map[rune]*ACNode)},
Expand Down Expand Up @@ -100,13 +116,6 @@ func (ac *AhoCorasick) FilterWithWhitelist(text string, whiteListedPositions []P
runes := []rune(text)
changes := false // 标记是否有替换发生

// 在函数内定义Replacement结构体来记录替换信息
type Replacement struct {
Start int // 替换起始位置
End int // 替换结束位置
Text string // 替换文本
}

// 创建一个替换列表,用于记录所有替换操作
var replacements []Replacement

Expand Down Expand Up @@ -144,23 +153,60 @@ func (ac *AhoCorasick) FilterWithWhitelist(text string, whiteListedPositions []P
}
}

// 使用applyReplacements函数替换原有的替换逻辑
if changes {
// 对文本进行实际替换
var result []rune
lastIndex := 0
for _, r := range replacements {
// 添加未被替换的部分
result = append(result, runes[lastIndex:r.Start]...)
// 添加替换文本
result = append(result, []rune(r.Text)...)
lastIndex = r.End + 1
newText := applyReplacements(text, replacements)
return newText
}
return text
}

// 假设Replacement定义如前所述

// Step 1: 合并重叠替换
func mergeOverlappingReplacements(replacements []Replacement) []Replacement {
if len(replacements) == 0 {
return replacements
}

// 按Start排序
sort.Slice(replacements, func(i, j int) bool {
if replacements[i].Start == replacements[j].Start {
return replacements[i].End > replacements[j].End // 如果Start相同,更长的在前
}
return replacements[i].Start < replacements[j].Start
})

merged := []Replacement{replacements[0]}
for _, current := range replacements[1:] {
last := &merged[len(merged)-1]
if current.Start <= last.End { // 检查重叠
if current.End > last.End {
last.End = current.End // 扩展当前项以包括重叠
last.Text = current.Text // 假设新的替换文本更优先
}
} else {
merged = append(merged, current)
}
// 添加最后一部分未被替换的文本
result = append(result, runes[lastIndex:]...)
return string(result)
}
return merged
}

return text
// Step 2 & 3: 实施替换
func applyReplacements(text string, replacements []Replacement) string {
runes := []rune(text)
var result []rune
lastIndex := 0
for _, r := range mergeOverlappingReplacements(replacements) {
// 添加未被替换的部分
result = append(result, runes[lastIndex:r.Start]...)
// 添加替换文本
result = append(result, []rune(r.Text)...)
lastIndex = r.End + 1
}
// 添加最后一部分未被替换的文本
result = append(result, runes[lastIndex:]...)
return string(result)
}

type Position struct {
Expand Down Expand Up @@ -279,7 +325,7 @@ func convertToUnicodeEscape(str string) string {
}

// 改写后的函数,接受word参数,并返回处理结果
func CheckWord(word string) string {
func CheckWordIN(word string) string {
if word == "" {
log.Println("错误请求:缺少 'word' 参数")
return "错误:缺少 'word' 参数"
Expand All @@ -303,3 +349,29 @@ func CheckWord(word string) string {

return result
}

// 改写后的函数,接受word参数,并返回处理结果
func CheckWordOUT(word string) string {
if word == "" {
log.Println("错误请求:缺少 'word' 参数")
return "错误:缺少 'word' 参数"
}

if len([]rune(word)) > 5000 {
if strings.Contains(word, "[CQ:image,file=base64://") {
// 当word包含特定字符串时原样返回
fmtf.Printf("原样返回的文本:%s", word)
return word
}
log.Printf("错误请求:字符数超过最大限制(5000字符)。内容:%s", word)
return "错误:字符数超过最大限制(5000字符)"
}

// 使用全局的wac进行白名单匹配
whiteListedPositions := wac.MatchPositions(word)

// 使用全局的ac进行过滤,并结合白名单
result := acout.FilterWithWhitelist(word, whiteListedPositions)

return result
}
39 changes: 0 additions & 39 deletions applogic/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package applogic
import (
"database/sql"

"github.com/hoshinonyaruko/gensokyo-llm/config"
"github.com/hoshinonyaruko/gensokyo-llm/fmtf"
"github.com/hoshinonyaruko/gensokyo-llm/hunyuan"
"github.com/hoshinonyaruko/gensokyo-llm/structs"
Expand Down Expand Up @@ -118,44 +117,6 @@ func (app *App) updateUserContext(userID int64, parentMessageID string) error {
return nil
}

func truncateHistoryHunYuan(history []structs.Message, prompt string) []structs.Message {
MAX_TOKENS := config.GetMaxTokensHunyuan()

tokenCount := len(prompt)
for _, msg := range history {
tokenCount += len(msg.Text)
}

if tokenCount <= MAX_TOKENS {
return history
}

// 第一步:移除所有助手回复
truncatedHistory := []structs.Message{}
for _, msg := range history {
if msg.Role == "user" {
truncatedHistory = append(truncatedHistory, msg)
}
}

tokenCount = len(prompt)
for _, msg := range truncatedHistory {
tokenCount += len(msg.Text)
}

if tokenCount <= MAX_TOKENS {
return truncatedHistory
}

// 第二步:从开始逐个移除消息,直到满足令牌数量限制
for tokenCount > MAX_TOKENS && len(truncatedHistory) > 0 {
tokenCount -= len(truncatedHistory[0].Text)
truncatedHistory = truncatedHistory[1:]
}

return truncatedHistory
}

func (app *App) getHistory(conversationID, parentMessageID string) ([]structs.Message, error) {
var history []structs.Message

Expand Down
54 changes: 35 additions & 19 deletions applogic/chatgpt.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,22 @@ func (app *App) ChatHandlerChatgpt(w http.ResponseWriter, r *http.Request) {

// 获取历史信息
if msg.ParentMessageID != "" {
history, err = app.getHistory(msg.ConversationID, msg.ParentMessageID)
userhistory, err := app.getHistory(msg.ConversationID, msg.ParentMessageID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}

// 截断历史信息
history = truncateHistoryGpt(history, msg.Text)
userhistory = truncateHistoryGpt(userhistory, msg.Text)

// 注意追加的顺序,确保问题在系统提示词之后
// 使用...操作符来展开userhistory切片并追加到history切片
history = append(history, userhistory...)
}

fmtf.Printf("CLOSE-AI上下文history:%v\n", history)

// 构建请求到ChatGPT API
model := config.GetGptModel()
apiURL := config.GetGptApiPath()
Expand Down Expand Up @@ -398,28 +404,38 @@ func truncateHistoryGpt(history []structs.Message, prompt string) []structs.Mess
return history
}

// 第一步:移除所有助手回复
truncatedHistory := []structs.Message{}
for _, msg := range history {
if msg.Role == "user" {
truncatedHistory = append(truncatedHistory, msg)
}
}
// 第一步:从开始逐个移除消息,直到满足令牌数量限制
for tokenCount > MAX_TOKENS && len(history) > 0 {
tokenCount -= len(history[0].Text)
history = history[1:]

tokenCount = len(prompt)
for _, msg := range truncatedHistory {
tokenCount += len(msg.Text)
// 确保移除后,历史记录仍然以user消息结尾
if len(history) > 0 && history[0].Role == "assistant" {
tokenCount -= len(history[0].Text)
history = history[1:]
}
}

if tokenCount <= MAX_TOKENS {
return truncatedHistory
// 第二步:检查并移除包含空文本的QA对
for i := 0; i < len(history)-1; i++ { // 使用len(history)-1是因为我们要检查成对的消息
q := history[i]
a := history[i+1]

// 检查Q和A是否成对,且A的角色应为assistant,Q的角色为user,避免删除非QA对的消息
if q.Role == "user" && a.Role == "assistant" && (len(q.Text) == 0 || len(a.Text) == 0) {
fmtf.Println("closeai-找到了空的对话: ", q, a)
// 移除这对QA
history = append(history[:i], history[i+2:]...)
i-- // 因为删除了元素,调整索引以正确检查下一个元素
}
}

// 第二步:从开始逐个移除消息,直到满足令牌数量限制
for tokenCount > MAX_TOKENS && len(truncatedHistory) > 0 {
tokenCount -= len(truncatedHistory[0].Text)
truncatedHistory = truncatedHistory[1:]
// 确保以user结尾,如果不是则尝试移除直到满足条件
if len(history) > 0 && history[len(history)-1].Role != "user" {
for len(history) > 0 && history[len(history)-1].Role != "user" {
history = history[:len(history)-1]
}
}

return truncatedHistory
return history
}
Loading
Loading