From 8ceef12972cfd5a060ebc3252294860b5cdd2e42 Mon Sep 17 00:00:00 2001 From: Ruairidh MacLeod Date: Mon, 7 Aug 2023 16:16:58 +0100 Subject: [PATCH] support words before and after the FailurePart --- .../Reporting/Reports/FailureStoreReport.cs | 5 +- IsIdentifiable/Rules/RegexRule_PartTemp.cs | 50 ++++++++++++++++--- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs b/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs index 448b2511..ed2ff617 100644 --- a/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs +++ b/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs @@ -153,6 +153,7 @@ public static IEnumerable Deserialize(IFileInfo oldFile, Action lo token.ThrowIfCancellationRequested(); lineNumber++; var problemField = r["ProblemField"]; + var problemValue = r["ProblemValue"]; var words = r["PartWords"].Split(Separator); var classes = r["PartClassifications"].Split(Separator); var offsets = r["PartOffsets"].Split(Separator); @@ -172,7 +173,7 @@ public static IEnumerable Deserialize(IFileInfo oldFile, Action lo if (!string.IsNullOrWhiteSpace(partRule.IfColumn) && !string.Equals(partRule.IfColumn, problemField, StringComparison.InvariantCultureIgnoreCase)) continue; - foreach (var part in parts.Where(x => partRule.Covers(x))) + foreach (var part in parts.Where(x => partRule.Covers(x, problemValue))) toRemove.Add(part); } parts = parts.Except(toRemove); @@ -184,7 +185,7 @@ public static IEnumerable Deserialize(IFileInfo oldFile, Action lo Resource = r["Resource"], ResourcePrimaryKey = r["ResourcePrimaryKey"], ProblemField = problemField, - ProblemValue = r["ProblemValue"], + ProblemValue = problemValue, }; if (lineNumber % 1000 == 0) diff --git a/IsIdentifiable/Rules/RegexRule_PartTemp.cs b/IsIdentifiable/Rules/RegexRule_PartTemp.cs index c0ed579c..14684301 100644 --- a/IsIdentifiable/Rules/RegexRule_PartTemp.cs +++ b/IsIdentifiable/Rules/RegexRule_PartTemp.cs @@ -41,24 +41,58 @@ public override bool CaseSensitive } } + public string WordBefore { get; set; } + + public string WordAfter { get; set; } + // TODO(rkm 2023-07-25) Shouldn't be needed when IfPattern is readonly private void RebuildPartRegex() { - if (!_ifPartPatternString.StartsWith("^") || _ifPartPatternString.EndsWith("$")) + if (_ifPartPatternString == null) + throw new Exception("Illegal rule setup. You must specify IfPartPattern"); + if (!_ifPartPatternString.StartsWith("^") || !_ifPartPatternString.EndsWith("$")) throw new ArgumentException("IfPartPattern must be enclosed by ^ and $"); - IfPartPatternRegex = _ifPartPatternString == null ? null : new Regex(_ifPartPatternString, (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase) | RegexOptions.Compiled); + + IfPartPatternRegex = new Regex(_ifPartPatternString, (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase) | RegexOptions.Compiled); } - public bool Covers(FailurePart failurePart) + public bool Covers(FailurePart failurePart, string problemValue) { - if (IfPartPattern == null) - throw new Exception("Illegal rule setup. You must specify IfPartPattern"); - if (As != failurePart.Classification) return false; - var matches = IfPartPatternRegex.Matches(failurePart.Word); - return matches.Any(); + bool matchesBefore = false; + if (!string.IsNullOrWhiteSpace(WordBefore)) + { + var problemValueUpToOffset = problemValue[..(failurePart.Offset + failurePart.Word.Length)]; + if (!problemValueUpToOffset.EndsWith(failurePart.Word)) + throw new Exception("Invlaid data: actual word and word at offset did not match"); + + var wordBeforeRegex = new Regex($"\\b{WordBefore}\\s+{IfPartPattern.TrimStart('^')}", (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase)); + matchesBefore = wordBeforeRegex.Matches(problemValueUpToOffset).Any(); + } + + bool matchesAfter = false; + if (!string.IsNullOrWhiteSpace(WordAfter)) + { + var problemValueFromOffset = problemValue[failurePart.Offset..]; + if (!problemValueFromOffset.StartsWith(failurePart.Word)) + throw new Exception("Invlaid data: actual word and word at offset did not match"); + + var wordAfterRegex = new Regex($"{IfPartPattern.TrimEnd('$')}\\s+{WordAfter}\\b", (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase)); + matchesAfter = wordAfterRegex.Matches(problemValueFromOffset).Any(); + } + + if ( + matchesBefore && string.IsNullOrWhiteSpace(WordAfter) || + matchesAfter && string.IsNullOrWhiteSpace(WordBefore) || + (matchesBefore && matchesAfter) + ) + { + return true; + } + + return IfPartPatternRegex.Matches(failurePart.Word).Any(); } }