Skip to content

Commit

Permalink
feat(sanitizer): improve text truncation with better space handling
Browse files Browse the repository at this point in the history
  • Loading branch information
fguillot committed Feb 7, 2025
1 parent 7bdf133 commit 9279073
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 2 deletions.
5 changes: 3 additions & 2 deletions internal/reader/sanitizer/truncate.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ func TruncateHTML(input string, max int) string {
text := StripTags(input)
text = strings.ReplaceAll(text, "\n", " ")
text = strings.ReplaceAll(text, "\t", " ")
text = strings.ReplaceAll(text, " ", " ")
text = strings.TrimSpace(text)

// Collapse multiple spaces into a single space
text = strings.Join(strings.Fields(text), " ")

// Convert to runes to be safe with unicode
runes := []rune(text)
Expand Down
50 changes: 50 additions & 0 deletions internal/reader/sanitizer/truncate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,53 @@ func TestTruncateHTMLWithMultilineTextLowerThanLimit(t *testing.T) {
t.Errorf(`Wrong output: %q != %q`, expected, output)
}
}

func TestTruncateHTMLWithMultipleSpaces(t *testing.T) {
tests := []struct {
name string
input string
maxLen int
expected string
}{
{
name: "multiple spaces",
input: "hello world test",
maxLen: 20,
expected: "hello world test",
},
{
name: "tabs and newlines",
input: "hello\t\tworld\n\ntest",
maxLen: 20,
expected: "hello world test",
},
{
name: "truncation with unicode",
input: "hello world 你好",
maxLen: 11,
expected: "hello world…",
},
{
name: "html stripping",
input: "<p>hello <b>world</b> test</p>",
maxLen: 20,
expected: "hello world test",
},
{
name: "no truncation needed",
input: "hello world",
maxLen: 20,
expected: "hello world",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := TruncateHTML(tt.input, tt.maxLen)
if result != tt.expected {
t.Errorf("TruncateHTML(%q, %d) = %q, want %q",
tt.input, tt.maxLen, result, tt.expected)
}
})
}
}

0 comments on commit 9279073

Please sign in to comment.