From db5869b5dfd6e8c964ff749f6e6e252eac0e7a1d Mon Sep 17 00:00:00 2001 From: Christoph Thiede Date: Wed, 19 Jun 2024 18:10:52 +0200 Subject: [PATCH] filters: accelerate text search --- .../TalkTextSearcher.class/instance/filter..st | 12 ++++++++++++ .../TalkTextSearcher.class/instance/partTerms.st | 15 +++++++++------ .../TalkTextSearcher.class/methodProperties.json | 3 ++- 3 files changed, 23 insertions(+), 7 deletions(-) create mode 100644 packages/SqueakInboxTalk.package/TalkTextSearcher.class/instance/filter..st diff --git a/packages/SqueakInboxTalk.package/TalkTextSearcher.class/instance/filter..st b/packages/SqueakInboxTalk.package/TalkTextSearcher.class/instance/filter..st new file mode 100644 index 0000000..3e72d17 --- /dev/null +++ b/packages/SqueakInboxTalk.package/TalkTextSearcher.class/instance/filter..st @@ -0,0 +1,12 @@ +filtering +filter: conversations + "Overridden for performance." + + | partSearchers | + self isActive ifFalse: [^ conversations]. + + partSearchers := self partSearchers. + + ^ conversations select: [:conversation | + partSearchers allSatisfy: [:searcher | + (searcher matchesIn: conversation) talkIsEmpty not]] \ No newline at end of file diff --git a/packages/SqueakInboxTalk.package/TalkTextSearcher.class/instance/partTerms.st b/packages/SqueakInboxTalk.package/TalkTextSearcher.class/instance/partTerms.st index 53ef897..554cd3e 100644 --- a/packages/SqueakInboxTalk.package/TalkTextSearcher.class/instance/partTerms.st +++ b/packages/SqueakInboxTalk.package/TalkTextSearcher.class/instance/partTerms.st @@ -2,7 +2,7 @@ private partTerms "Split the current term into part terms that form a complex term together." - | patternFull patternMulti patternSingle patternTerm regexFull regexMulti regexSingle termStream | + | patternMulti patternSingle regexFull regexMulti regexSingle termStream | (self usesRegex not and: [self hasComplexTerm]) ifFalse: [^ {self term}]. self term withBlanksTrimmed ifEmpty: [^ {self term}]. @@ -12,13 +12,16 @@ partTerms self flag: #regexFamily. patternSingle := '[^\s"]+'. patternMulti := '"([^"\\]|\\["\\])+"'. - regexSingle := patternSingle asRegex. - regexMulti := patternMulti asRegex. - patternTerm := '(?%<=^|\s)(<1s>|<2s>)(?=\s|$)' + regexSingle := [patternSingle asRegex] once. + regexMulti := [patternMulti asRegex] once. + regexFull := + [| patternTerm patternFull | + patternTerm := '(?%<=^|\s)(<1s>|<2s>)(?=\s|$)' expandMacrosWith: patternSingle with: patternMulti. - patternFull := '^(\s*(<1s>)\s*)*$' expandMacrosWith: patternTerm. - regexFull := patternFull asRegex. + patternFull := '^(\s*(<1s>)\s*)*$' expandMacrosWith: patternTerm. + patternFull asRegex] + once. termStream := self term readStream. (regexFull matchesStream: termStream) ifFalse: [ diff --git a/packages/SqueakInboxTalk.package/TalkTextSearcher.class/methodProperties.json b/packages/SqueakInboxTalk.package/TalkTextSearcher.class/methodProperties.json index 65d930d..9d7d710 100644 --- a/packages/SqueakInboxTalk.package/TalkTextSearcher.class/methodProperties.json +++ b/packages/SqueakInboxTalk.package/TalkTextSearcher.class/methodProperties.json @@ -17,6 +17,7 @@ "doMatchWildcard" : "ct 12/4/2021 23:45", "doUseRegex" : "ct 6/17/2021 15:26", "errors" : "ct 6/18/2021 00:33", + "filter:" : "ct 6/19/2024 18:10", "fullyMatches:" : "ct 7/24/2021 00:12", "fuzzyStrategies" : "ct 7/24/2021 00:57", "hasComplexTerm" : "ct 7/24/2021 00:47", @@ -51,7 +52,7 @@ "numberOfSurroundingLines" : "ct 7/24/2021 00:52", "numberOfSurroundingLines:" : "ct 7/24/2021 00:52", "partSearchers" : "ct 7/24/2021 00:59", - "partTerms" : "ct 7/24/2021 01:00", + "partTerms" : "ct 6/7/2024 17:02", "postCopy" : "ct 6/11/2021 18:11", "regexIntervalsIn:" : "ct 7/22/2021 19:40", "removeRedundantLabelsFromMatches:" : "ct 1/5/2024 16:25",