From 492b2d8fe3dd0cfcfba5b4cb6a792fcc89f4b28e Mon Sep 17 00:00:00 2001 From: Ruairidh MacLeod Date: Tue, 8 Aug 2023 12:01:37 +0100 Subject: [PATCH] pull-up offset fixing code to cover all parts --- .../Reporting/Reports/FailureStoreReport.cs | 34 +++++++++++++------ IsIdentifiable/Rules/RegexRule_PartTemp.cs | 23 ------------- 2 files changed, 24 insertions(+), 33 deletions(-) diff --git a/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs b/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs index 8db94c8b..3a3de389 100644 --- a/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs +++ b/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs @@ -153,7 +153,7 @@ public static IEnumerable Deserialize(IFileInfo oldFile, Action lo token.ThrowIfCancellationRequested(); lineNumber++; var problemField = r["ProblemField"]; - var problemValue = r["ProblemValue"]; + var problemValue = r["ProblemValue"] ?? throw new Exception("ProblemValue was null"); var words = r["PartWords"].Split(Separator); var classes = r["PartClassifications"].Split(Separator); var offsets = r["PartOffsets"].Split(Separator); @@ -166,6 +166,27 @@ public static IEnumerable Deserialize(IFileInfo oldFile, Action lo ) ); + // Fixes any offsets that have been mangled by file endings etc. + foreach (var part in parts) + { + if (problemValue.Substring(part.Offset, part.Word.Length) == part.Word) + continue; + + // Try looking ahead first, then back + var origOffset = part.Offset; + try + { + while (problemValue.Substring(part.Offset, part.Word.Length) != part.Word) + part.Offset++; + } + catch (ArgumentOutOfRangeException) + { + part.Offset = origOffset; + while (problemValue.Substring(part.Offset, part.Word.Length) != part.Word) + part.Offset--; + } + } + /* TEMP - Filter out any FailureParts covered by an PartRegexRule_Temp */ var toRemove = new List(); foreach (var partRule in partRules) @@ -173,15 +194,8 @@ public static IEnumerable Deserialize(IFileInfo oldFile, Action lo if (!string.IsNullOrWhiteSpace(partRule.IfColumn) && !string.Equals(partRule.IfColumn, problemField, StringComparison.InvariantCultureIgnoreCase)) continue; - foreach (var part in parts) - { - var origOffset = part.Offset; - if (partRule.Covers(part, problemValue)) - { - part.Offset = origOffset; - toRemove.Add(part); - } - } + foreach (var part in parts.Where(x => partRule.Covers(x, problemValue))) + toRemove.Add(part); } parts = parts.Except(toRemove); /* TEMP */ diff --git a/IsIdentifiable/Rules/RegexRule_PartTemp.cs b/IsIdentifiable/Rules/RegexRule_PartTemp.cs index 70228b4c..b507c52d 100644 --- a/IsIdentifiable/Rules/RegexRule_PartTemp.cs +++ b/IsIdentifiable/Rules/RegexRule_PartTemp.cs @@ -57,34 +57,11 @@ private void RebuildPartRegex() IfPartPatternRegex = new Regex(_ifPartPatternString, (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase) | RegexOptions.Compiled); } - private static void FixupOffset(FailurePart failurePart, string problemValue) - { - if (problemValue.Substring(failurePart.Offset, failurePart.Word.Length) == failurePart.Word) - return; - - // Try looking ahead first, then back - var origOffset = failurePart.Offset; - try - { - while (problemValue.Substring(failurePart.Offset, failurePart.Word.Length) != failurePart.Word) - failurePart.Offset++; - } - catch (ArgumentOutOfRangeException) - { - failurePart.Offset = origOffset; - while (problemValue.Substring(failurePart.Offset, failurePart.Word.Length) != failurePart.Word) - failurePart.Offset--; - } - } - public bool Covers(FailurePart failurePart, string problemValue) { if (As != failurePart.Classification) return false; - // Fixes any offsets that have been mangled by file endings etc. - FixupOffset(failurePart, problemValue); - bool matchesBefore = false; if (!string.IsNullOrWhiteSpace(WordBefore)) {