Skip to content

Commit

Permalink
chore: Add @ spam detection
Browse files Browse the repository at this point in the history
  • Loading branch information
snorremd committed Dec 29, 2024
1 parent 5ee826e commit cad43db
Showing 1 changed file with 18 additions and 9 deletions.
27 changes: 18 additions & 9 deletions firehose/firehose.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,27 +139,36 @@ func containsSpamContent(text string) bool {
}
}

// Count hashtags
// Count hashtags and mentions
hashtagCount := strings.Count(text, "#")
mentionCount := strings.Count(text, "@")

// If more than 5 hashtags, consider it spam
if hashtagCount > 5 {
log.Infof("Skipping spam post with many hashtags: %s", text)
return true
}

// Check for repeated hashtags (common spam pattern)
if strings.Count(text, "##") > 0 {
log.Infof("Skipping spam post with repeated hashtags: %s", text)
// If more than 5 mentions, consider it spam
if mentionCount > 5 {
log.Infof("Skipping spam post with many mentions: %s", text)
return true
}

// Check for repeated hashtags or mentions (common spam pattern)
if strings.Count(text, "##") > 0 || strings.Count(text, "@@") > 0 {
log.Infof("Skipping spam post with repeated hashtags/mentions: %s", text)
return true
}

// Check for hashtag ratio
// Check for hashtag and mention ratios
words := strings.Fields(text)
if len(words) > 0 {
hashtagRatio := float64(hashtagCount) / float64(len(words))
// If more than 40% of words are hashtags, consider it spam
if hashtagRatio > 0.4 {
log.Infof("Skipping spam post with high hashtag ratio: %s", text)
// Calculate combined ratio of hashtags and mentions
symbolRatio := float64(hashtagCount+mentionCount) / float64(len(words))
// If more than 50% of words are hashtags or mentions combined, consider it spam
if symbolRatio > 0.5 {
log.Infof("Skipping spam post with high hashtag/mention ratio: %s", text)
return true
}
}
Expand Down

0 comments on commit cad43db

Please sign in to comment.