Skip to content

Commit

Permalink
support words before and after the FailurePart
Browse files Browse the repository at this point in the history
  • Loading branch information
rkm committed Aug 7, 2023
1 parent a86f02a commit 8ceef12
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 10 deletions.
5 changes: 3 additions & 2 deletions IsIdentifiable/Reporting/Reports/FailureStoreReport.cs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ public static IEnumerable<Failure> Deserialize(IFileInfo oldFile, Action<int> lo
token.ThrowIfCancellationRequested();
lineNumber++;
var problemField = r["ProblemField"];
var problemValue = r["ProblemValue"];
var words = r["PartWords"].Split(Separator);
var classes = r["PartClassifications"].Split(Separator);
var offsets = r["PartOffsets"].Split(Separator);
Expand All @@ -172,7 +173,7 @@ public static IEnumerable<Failure> Deserialize(IFileInfo oldFile, Action<int> lo
if (!string.IsNullOrWhiteSpace(partRule.IfColumn) && !string.Equals(partRule.IfColumn, problemField, StringComparison.InvariantCultureIgnoreCase))
continue;

foreach (var part in parts.Where(x => partRule.Covers(x)))
foreach (var part in parts.Where(x => partRule.Covers(x, problemValue)))
toRemove.Add(part);
}

Check notice

Code scanning / CodeQL

Missed opportunity to use Where Note

This foreach loop
implicitly filters its target sequence
- consider filtering the sequence explicitly using '.Where(...)'.
parts = parts.Except(toRemove);
Expand All @@ -184,7 +185,7 @@ public static IEnumerable<Failure> Deserialize(IFileInfo oldFile, Action<int> lo
Resource = r["Resource"],
ResourcePrimaryKey = r["ResourcePrimaryKey"],
ProblemField = problemField,
ProblemValue = r["ProblemValue"],
ProblemValue = problemValue,
};

if (lineNumber % 1000 == 0)
Expand Down
50 changes: 42 additions & 8 deletions IsIdentifiable/Rules/RegexRule_PartTemp.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,58 @@ public override bool CaseSensitive
}
}

public string WordBefore { get; set; }

public string WordAfter { get; set; }

// TODO(rkm 2023-07-25) Shouldn't be needed when IfPattern is readonly
private void RebuildPartRegex()
{
if (!_ifPartPatternString.StartsWith("^") || _ifPartPatternString.EndsWith("$"))
if (_ifPartPatternString == null)
throw new Exception("Illegal rule setup. You must specify IfPartPattern");

if (!_ifPartPatternString.StartsWith("^") || !_ifPartPatternString.EndsWith("$"))
throw new ArgumentException("IfPartPattern must be enclosed by ^ and $");
IfPartPatternRegex = _ifPartPatternString == null ? null : new Regex(_ifPartPatternString, (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase) | RegexOptions.Compiled);

IfPartPatternRegex = new Regex(_ifPartPatternString, (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase) | RegexOptions.Compiled);
}

public bool Covers(FailurePart failurePart)
public bool Covers(FailurePart failurePart, string problemValue)
{
if (IfPartPattern == null)
throw new Exception("Illegal rule setup. You must specify IfPartPattern");

if (As != failurePart.Classification)
return false;

var matches = IfPartPatternRegex.Matches(failurePart.Word);
return matches.Any();
bool matchesBefore = false;
if (!string.IsNullOrWhiteSpace(WordBefore))
{
var problemValueUpToOffset = problemValue[..(failurePart.Offset + failurePart.Word.Length)];
if (!problemValueUpToOffset.EndsWith(failurePart.Word))
throw new Exception("Invlaid data: actual word and word at offset did not match");

var wordBeforeRegex = new Regex($"\\b{WordBefore}\\s+{IfPartPattern.TrimStart('^')}", (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase));
matchesBefore = wordBeforeRegex.Matches(problemValueUpToOffset).Any();
}

bool matchesAfter = false;
if (!string.IsNullOrWhiteSpace(WordAfter))
{
var problemValueFromOffset = problemValue[failurePart.Offset..];
if (!problemValueFromOffset.StartsWith(failurePart.Word))
throw new Exception("Invlaid data: actual word and word at offset did not match");

var wordAfterRegex = new Regex($"{IfPartPattern.TrimEnd('$')}\\s+{WordAfter}\\b", (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase));
matchesAfter = wordAfterRegex.Matches(problemValueFromOffset).Any();
}

if (
matchesBefore && string.IsNullOrWhiteSpace(WordAfter) ||
matchesAfter && string.IsNullOrWhiteSpace(WordBefore) ||
(matchesBefore && matchesAfter)

Check notice

Code scanning / CodeQL

Complex condition Note

Complex condition: too many logical operations in this expression.
)
{
return true;
}

return IfPartPatternRegex.Matches(failurePart.Word).Any();
}
}

0 comments on commit 8ceef12

Please sign in to comment.