diff --git a/dict/dictionary.go b/dict/dictionary.go index 4c06b0c..0c78e75 100644 --- a/dict/dictionary.go +++ b/dict/dictionary.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "log" + "strings" "time" "github.com/junegunn/fzf/src/util" @@ -152,41 +153,42 @@ func (e *Entry) WriteLine() []byte { return bs } -// ParseInput \n -// "你\t好" > "你", "好", "" -// "你 好" > "你", "好", "" -// "你 好" > "你", "好", "" -// "你\t 好" > "你", "好", "" -// "你 好\t 1" > "你", "好", "1" -// "你好 nau 1" > "你好", "nau", "1" -// "nau 你好 1" > "你好", "nau", "1" -// " nau 你好 1 " > "你好", "nau", "1" -func ParseInput(raw string) [3]string { - pair := [3]string{} - for j, l, i := 0, 0, 0; i <= len(raw); i++ { - if i == len(raw) || raw[i] == '\t' || raw[i] == ' ' { - if l == i { - l = i + 1 - continue +// Parse input string to a pair of strings +// 0: 表(汉字) 1: 码(字母) 2: 权重 +// 支持乱序输入,如 "你好 nau 1" 或 "nau 1 你好" +func ParseInput(raw string) (pair [3]string) { + pair = [3]string{} + // split by '\t' or ' ' + splits := strings.Fields(raw) + for i := 0; i < len(splits); i++ { + item := strings.TrimSpace(splits[i]) + if len(item) == 0 { + continue + } + if isNumber(item) { + pair[2] = item + continue + } + if isAscii(item) { + pair[1] = item + } else { + space := " " + if pair[0] == "" { + space = "" } - pair[j] = raw[l:i] - l = i + 1 - j++ + pair[0] = pair[0] + space + item } } - notAsciiIndex := 0 - for i, p := range pair { - if !isAscii(p) { - notAsciiIndex = i - break + return +} + +func isNumber(str string) bool { + for _, r := range str { + if r < '0' || r > '9' { + return false } } - if notAsciiIndex != 0 { - t := pair[notAsciiIndex] - pair[notAsciiIndex] = pair[0] - pair[0] = t - } - return pair + return true } func isAscii(str string) bool { @@ -198,15 +200,24 @@ func isAscii(str string) bool { return true } +// Parse bytes as a couple of strings([]byte) separated by '\t' +// e.g. "你好 nau" > ["你好", "nau"] +// not like ParseInput, this function simply split by '\t' func ParsePair(raw []byte) [][]byte { pair := make([][]byte, 0) for i, j := 0, 0; i < len(raw); i++ { if raw[i] == '\t' { - pair = append(pair, bytes.TrimSpace(raw[j:i])) + item := bytes.TrimSpace(raw[j:i]) + if len(item) > 0 { + pair = append(pair, item) + } j = i + 1 } if i == len(raw)-1 && j <= i { - pair = append(pair, bytes.TrimSpace(raw[j:])) + item := bytes.TrimSpace(raw[j:]) + if len(item) > 0 { + pair = append(pair, item) + } } } return pair diff --git a/dict/dictionary_test.go b/dict/dictionary_test.go index 6fb8996..8e1348c 100644 --- a/dict/dictionary_test.go +++ b/dict/dictionary_test.go @@ -4,31 +4,31 @@ import ( "context" "fmt" "reflect" + "sort" + "strings" "testing" ) -func TestDictionary_Search(t *testing.T) { +func Test_Dictionary_Search(t *testing.T) { type args struct { key []rune fes []*FileEntries } fes1 := &FileEntries{ Entries: []*Entry{ - NewEntry([]byte("helle world"), "", 0, 0), - NewEntry([]byte("hi, did eve alive?"), "", 0, 0), - NewEntry([]byte("你好"), "", 0, 0), + NewEntry([]byte("helle world"), "", 1, 0), + NewEntry([]byte("hi, did eve alive?"), "", 2, 0), + NewEntry([]byte("你好"), "", 3, 0), }, } - fes2 := LoadItems("../rime/xkjd/xkjd6.dict.yaml") - fmt.Println(len(fes2)) tests := []struct { name string args args + want []*Entry }{ - {"3", args{[]rune("wor"), []*FileEntries{fes1}}}, - {"1", args{[]rune("hel"), []*FileEntries{fes1}}}, - {"2", args{[]rune("你"), []*FileEntries{fes1}}}, - {"load", args{[]rune("hmxa"), fes2}}, + {"case1", args{[]rune("wor"), []*FileEntries{fes1}}, []*Entry{fes1.Entries[0]}}, + {"case2", args{[]rune("hel"), []*FileEntries{fes1}}, []*Entry{fes1.Entries[0], fes1.Entries[1]}}, + {"case3", args{[]rune("你"), []*FileEntries{fes1}}, []*Entry{fes1.Entries[2]}}, } for _, tt := range tests { t.Run(tt.name, func(_ *testing.T) { @@ -36,15 +36,28 @@ func TestDictionary_Search(t *testing.T) { ctx := context.Background() ch := make(chan []*MatchResult) fmt.Println("searching for", string(tt.args.key)) - go dict.Search(tt.args.key, ch, ctx) + go func() { + dict.Search(tt.args.key, ch, ctx) + close(ch) + }() for ret := range ch { - fmt.Println(ret) + fmt.Println("ret", ret) + entries := make([]*Entry, 0) + for _, r := range ret { + entries = append(entries, r.Entry) + } + sort.Slice(entries, func(i, j int) bool { + return entries[i].seek < entries[j].seek + }) + if !reflect.DeepEqual(entries, tt.want) { + t.Errorf("Search() = %v, want %v", entries, tt.want) + } } }) } } -func TestParseInput(t *testing.T) { +func Test_ParseInput(t *testing.T) { type args struct { raw string } @@ -53,15 +66,19 @@ func TestParseInput(t *testing.T) { args args want [3]string }{ - {"1", args{"你\t好"}, [3]string{"你", "好", ""}}, - {"1", args{"你 好"}, [3]string{"你", "好", ""}}, - {"1", args{"你 好"}, [3]string{"你", "好", ""}}, - {"1", args{"你\t 好"}, [3]string{"你", "好", ""}}, - {"1", args{"你 好\t 1"}, [3]string{"你", "好", "1"}}, - {"1", args{"你好 nau 1"}, [3]string{"你好", "nau", "1"}}, - {"1", args{"nau 你好 1"}, [3]string{"你好", "nau", "1"}}, - {"1", args{" nau 你好 1 "}, [3]string{"你好", "nau", "1"}}, + {"case1", args{"你\t好"}, [3]string{"你 好", "", ""}}, + {"case2", args{"你 好"}, [3]string{"你 好", "", ""}}, + {"case3", args{"你 好"}, [3]string{"你 好", "", ""}}, + {"case4", args{"你\t 好"}, [3]string{"你 好", "", ""}}, + {"case5", args{"你 好\t 1"}, [3]string{"你 好", "", "1"}}, + {"case6", args{"你好 nau 1"}, [3]string{"你好", "nau", "1"}}, + {"case7", args{"nau 你好 1"}, [3]string{"你好", "nau", "1"}}, + {"case8", args{" nau 你好 1 "}, [3]string{"你好", "nau", "1"}}, + {"case9", args{"nau hi你好ya 1 "}, [3]string{"hi你好ya", "nau", "1"}}, + {"case10", args{"nau hi 你好 ya 1i "}, [3]string{"你好", "1i", ""}}, } + fields := strings.Fields("你\t好") + fmt.Println(fields, len(fields)) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if got := ParseInput(tt.args.raw); !reflect.DeepEqual(got, tt.want) { @@ -71,55 +88,59 @@ func TestParseInput(t *testing.T) { } } -func TestParsePair(t *testing.T) { +func Test_ParsePair(t *testing.T) { type args struct { - raw []byte + raw string } tests := []struct { name string args args - want [][]byte + want []string }{ { name: "case1", - args: args{ - []byte("你好 nau"), - }, - want: [][]byte{ - []byte("你好"), - []byte("nau"), - }, + args: args{"你好 nau"}, + want: []string{"你好", "nau"}, }, { name: "case2", - args: args{ - []byte("你好\t\n"), - }, - want: [][]byte{ - []byte("你好"), - }, + args: args{"你好\t\n"}, + want: []string{"你好"}, }, { name: "case3", - args: args{ - []byte("你好 nau"), - }, - want: [][]byte{ - []byte("你好 nau"), - }, + args: args{"你好 nau"}, + want: []string{"你好 nau"}, }, { name: "case4", - args: args{ - []byte(" "), - }, - want: [][]byte{}, + args: args{" "}, + want: []string{}, + }, + { + name: "case5", + args: args{"你 好 nau 1"}, + want: []string{"你 好 nau 1"}, + }, + { + name: "case6", + args: args{"你 好\tnau\t1"}, + want: []string{"你 好", "nau", "1"}, + }, + { + name: "case7", + args: args{"你 好\t \tnau \t1"}, + want: []string{"你 好", "nau", "1"}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := ParsePair(tt.args.raw); !reflect.DeepEqual(got, tt.want) { - t.Errorf("ParsePair() = %v, want %v", got, tt.want) + want := make([][]byte, len(tt.want)) + for i, s := range tt.want { + want[i] = []byte(s) + } + if got := ParsePair([]byte(tt.args.raw)); !reflect.DeepEqual(got, want) { + t.Errorf("ParsePair() = %v, want %v", got, want) } }) } diff --git a/dict/loader_test.go b/dict/loader_test.go index 5ce3b76..f235cd9 100644 --- a/dict/loader_test.go +++ b/dict/loader_test.go @@ -7,15 +7,18 @@ import ( "time" ) -func Test_loadItems(t *testing.T) { +func Test_LoadItems(t *testing.T) { + filename := mockFile() + defer os.RemoveAll("./tmp") type args struct { path string } tests := []struct { name string args args + want int }{ - {"1", args{"../rime/xkjd/xkjd6.dict.yaml"}}, + {"1", args{filename}, 19}, } for _, tt := range tests { t.Run(tt.name, func(_ *testing.T) { @@ -29,40 +32,98 @@ func Test_loadItems(t *testing.T) { entries = append(entries, fe.Entries...) } } - //for _, entry := range list { - // fmt.Print(entry) - //} fmt.Println("count >>", len(entries)) + if len(entries) != tt.want { + t.Errorf("Load Item Count = %v, want %v", len(entries), tt.want) + } fmt.Println("======================================================") - duration2 := time.Since(start) fmt.Println("load duration >>", duration1) - fmt.Println("print duration >>", duration2-duration1) }) } } -func Test_loadFile(t *testing.T) { - type args struct { - path string - order int - } - tests := []struct { - name string - args args - }{ - {"1", args{"../.rime/xkjd/xkjd6.dict.yaml", 0}}, +func mockFile() string { + // create ./tmp directory + err := os.MkdirAll("./tmp", os.ModePerm) + if err != nil { + fmt.Println("mkdir error, ", err) + panic(err) } - for _, tt := range tests { - t.Run(tt.name, func(_ *testing.T) { - f, err := os.OpenFile(tt.args.path, os.O_RDONLY, 0666) - if err != nil { - fmt.Println("file open error, ", err) - } - stat, err := f.Stat() - if err != nil { - fmt.Println("file stat error, ", err) - } - fmt.Println(stat) - }) + content := ` +# 键道6 扩展词库控制 +--- +name: xkjd6 +version: "Q1" +sort: original +use_preset_vocabulary: false +import_tables: +# 扩展:单字 + - rime.danzi +# 扩展:词组 + - rime.cizu +# 扩展:符号 + - rime.fuhao +` + filename := createFile("./tmp/rime.dict.yaml", content) + content = ` +--- +name: xkjd6.danzi +version: "Q1" +sort: original +... +不 b +宾 bb +滨 bba + ` + createFile("./tmp/rime.danzi.dict.yaml", content) + content = ` +--- +name: xkjd6.cizu +version: "Q1" +sort: original +import_tables: +# 扩展:单字 + - rime.cizu2 +... +并不比 bbb +彬彬 bbbb +斌斌 bbbbo + ` + createFile("./tmp/rime.cizu.dict.yaml", content) + content = ` +① oyk +② oxj +③ osf +④ osk +⑤ owj +⑥ olq +⑦ oqk +⑧ obs +⑨ ojq +⑩ oek + ` + createFile("./tmp/rime.fuhao.dict.yaml", content) + content = ` +--- +name: xkjd6.whatever +version: "Q1" +sort: original +... +造作 zzzl +早做 zzzlo +早早 zzzz +` + createFile("./tmp/rime.cizu2.dict.yaml", content) + return filename +} + +func createFile(name string, content string) string { + file, err := os.Create(name) + if err != nil { + fmt.Println("create temp file error, ", err) + panic(err) } + defer file.Close() + file.WriteString(content) + return file.Name() } diff --git a/dict/matcher.go b/dict/matcher.go index c738edc..4ac576b 100644 --- a/dict/matcher.go +++ b/dict/matcher.go @@ -83,7 +83,7 @@ func (m *CacheMatcher) Search(key []rune, list []*Entry, resultChan chan<- []*Ma if result.Score > 0 { matched = append(matched, &MatchResult{entry, result}) } - if idx%chunkSize == 0 || idx == listLen-1 { + if (idx%chunkSize == 0 && idx != 0) || idx == listLen-1 { m2 := matched[lastIdx:] if len(m2) > 0 { resultChan <- m2 diff --git a/dict/output.go b/dict/output.go index 3827704..d652263 100644 --- a/dict/output.go +++ b/dict/output.go @@ -53,13 +53,12 @@ func tryFatalf(err error, format string, args ...interface{}) { log.Fatalf(format, args...) } } + func outputFile(rawBs []byte, path string, entries []*Entry) { - //log.Printf("rawBs now len %d\n", len(rawBs)) + // log.Printf("rawBs now len %d\n", len(rawBs)) file, err := os.OpenFile(path, os.O_RDWR, 0666) tryFatalf(err, "open File failed, Err:%v", err) - defer func() { - _ = file.Close() - }() + defer file.Close() bs := make([]byte, len(rawBs)) copy(bs, rawBs) willAddEntries := make([]*Entry, 0) diff --git a/dict/output_test.go b/dict/output_test.go index 6535392..723e9fa 100644 --- a/dict/output_test.go +++ b/dict/output_test.go @@ -2,10 +2,12 @@ package dict import ( "bytes" + "os" + "strings" "testing" ) -func Test_writeLine(t *testing.T) { +func Test_Entry_WriteLine(t *testing.T) { tests := []struct { name string want []byte @@ -18,8 +20,8 @@ func Test_writeLine(t *testing.T) { }, { name: "2", - entry: *NewEntryAdd([]byte("测试\tc"), ""), - want: []byte("测试\tc"), + entry: *NewEntryAdd([]byte("测试\tc\t1"), ""), + want: []byte("测试\tc\t1"), }, } for _, tt := range tests { @@ -31,32 +33,58 @@ func Test_writeLine(t *testing.T) { } } -func Test_outputFile(t *testing.T) { +func Test_output(t *testing.T) { + filename := mockFile() + defer os.RemoveAll("./tmp") + fes := LoadItems(filename) + content := ` +--- +name: xkjd6.whatever +version: "Q1" +sort: original +... +早早 zzzzmod +早早 zzzz +测试 ceek +` type args struct { - fe *FileEntries - entries []*Entry + fe *FileEntries } tests := []struct { - name string args args + name string + want string }{ { - name: "1", + name: "case1", args: func() args { - path := "../rime/xkjd/xkjd6.user.dict.yaml" - fes := LoadItems(path) - fe := fes[0] - entries := fe.Entries[:] - entries[0].Delete() - entries[1].ReRaw(append(entries[1].WriteLine(), []byte{'m', 'o', 'd'}...)) - entries = append(entries, NewEntryAdd([]byte("测试\tceek"), path)) - return args{fe, entries} + var fe *FileEntries + for _, f := range fes { + if strings.Contains(f.FilePath, "rime.cizu2.dict.yaml") { + fe = f + } + } + if fe == nil { + panic("file not found: rime.cizu2.dict.yaml") + } + fe.Entries[0].Delete() + fe.Entries[1].ReRaw(append(fe.Entries[2].WriteLine(), []byte{'m', 'o', 'd'}...)) + fe.Entries = append(fe.Entries, NewEntryAdd([]byte("测试\tceek"), fe.FilePath)) + return args{fe} }(), + want: content, }, } for _, tt := range tests { t.Run(tt.name, func(_ *testing.T) { - outputFile(tt.args.fe.RawBs, tt.args.fe.FilePath, tt.args.entries) + output([]*FileEntries{tt.args.fe}) + c, err := os.ReadFile("./tmp/rime.cizu2.dict.yaml") + if err != nil { + panic(err) + } + if string(c) != tt.want { + t.Errorf("output() = %v, want %v", string(c), tt.want) + } }) } }