From ceb42c8defe954db88a4f8251b3cca177b57a55b Mon Sep 17 00:00:00 2001 From: James A Sutherland Date: Mon, 12 Aug 2024 09:03:11 -0500 Subject: [PATCH] Warning fixups, pre-compile regexes --- .../Destinations/ReportDestination.cs | 7 ++- .../Runners/IsIdentifiableAbstractRunner.cs | 49 +++++++++++++------ .../RunnerTests/DicomFileRunnerTest.cs | 13 +++-- ii/IsIdentifiableFileGlobOptions.cs | 4 +- 4 files changed, 48 insertions(+), 25 deletions(-) diff --git a/IsIdentifiable/Reporting/Destinations/ReportDestination.cs b/IsIdentifiable/Reporting/Destinations/ReportDestination.cs index c27be22c..0a6d57ea 100644 --- a/IsIdentifiable/Reporting/Destinations/ReportDestination.cs +++ b/IsIdentifiable/Reporting/Destinations/ReportDestination.cs @@ -11,7 +11,7 @@ namespace IsIdentifiable.Reporting.Destinations; /// in a derrived class allows persistence of IsIdentifiable reports (e.g. to a /// CSV file or database table). /// -public abstract class ReportDestination : IReportDestination +public abstract partial class ReportDestination : IReportDestination { /// /// The options used to run IsIdentifiable @@ -23,7 +23,7 @@ public abstract class ReportDestination : IReportDestination /// protected readonly IFileSystem FileSystem; - private readonly Regex _multiSpaceRegex = new(" {2,}"); + private readonly Regex _multiSpaceRegex = MultispaceRegex(); /// /// Initializes the report destination and sets @@ -70,4 +70,7 @@ protected object StripWhitespace(object o) return o; } + + [GeneratedRegex(" {2,}", RegexOptions.CultureInvariant)] + private static partial Regex MultispaceRegex(); } diff --git a/IsIdentifiable/Runners/IsIdentifiableAbstractRunner.cs b/IsIdentifiable/Runners/IsIdentifiableAbstractRunner.cs index fd4218e6..9e31b0ea 100644 --- a/IsIdentifiable/Runners/IsIdentifiableAbstractRunner.cs +++ b/IsIdentifiable/Runners/IsIdentifiableAbstractRunner.cs @@ -25,7 +25,7 @@ namespace IsIdentifiable.Runners; /// Subclass to add support for new data sources. Current sources include reading from /// CSV files, Dicom files and database tables. /// -public abstract class IsIdentifiableAbstractRunner : IDisposable +public abstract partial class IsIdentifiableAbstractRunner : IDisposable { private readonly ILogger _logger = LogManager.GetCurrentClassLogger(); @@ -45,26 +45,25 @@ public abstract class IsIdentifiableAbstractRunner : IDisposable // DDMMYY + 4 digits // \b bounded i.e. not more than 10 digits - private readonly Regex _chiRegex = new(@"\b[0-3][0-9][0-1][0-9][0-9]{6}\b"); - readonly Regex _postcodeRegex = new(@"\b((GIR 0AA)|((([A-Z-[QVX]][0-9][0-9]?)|(([A-Z-[QVX]][A-Z-[IJZ]][0-9][0-9]?)|(([A-Z-[QVX]][0-9][A-HJKSTUW])|([A-Z-[QVX]][A-Z-[IJZ]][0-9][ABEHMNPRVWXY]))))\s?[0-9][A-Z-[CIKMOV]]{2}))\b", RegexOptions.IgnoreCase); + private readonly Regex _chiRegex = ChiRegex(); + readonly Regex _postcodeRegex = PostcodeRegex(); /// /// Matches a 'symbol' (digit followed by an optional th, rd or separator) then a month name (e.g. Jan or January) /// - readonly Regex _symbolThenMonth = new(@"\d+((th)|(rd)|(st)|[\-/\\])?\s?((Jan(uary)?)|(Feb(ruary)?)|(Mar(ch)?)|(Apr(il)?)|(May)|(June?)|(July?)|(Aug(ust)?)|(Sep(tember)?)|(Oct(ober)?)|(Nov(ember)?)|(Dec(ember)?))", RegexOptions.IgnoreCase); + readonly Regex _symbolThenMonth = SymbolMonthRegex(); /// /// Matches a month name (e.g. Jan or January) followed by a 'symbol' (digit followed by an optional th, rd or separator) then a /// - readonly Regex _monthThenSymbol = new(@"((Jan(uary)?)|(Feb(ruary)?)|(Mar(ch)?)|(Apr(il)?)|(May)|(June?)|(July?)|(Aug(ust)?)|(Sep(tember)?)|(Oct(ober)?)|(Nov(ember)?)|(Dec(ember)?))[\s\-/\\]?\d+((th)|(rd)|(st))?", RegexOptions.IgnoreCase); + readonly Regex _monthThenSymbol = MonthSymbolRegex(); /// /// Matches digits followed by a separator (: - \ etc) followed by more digits with optional AM / PM / GMT at the end /// However this looks more like a time than a date and I would argue that times are not PII? /// It's also not restrictive enough so matches too many non-PII numerics. /// - readonly Regex _date = new( - @"\b\d+([:\-/\\]\d+)+\s?((AM)|(PM)|(GMT))?\b", RegexOptions.IgnoreCase); + readonly Regex _date = DateRegex(); // The following regex were adapted from: // https://www.oreilly.com/library/view/regular-expressions-cookbook/9781449327453/ch04s04.html @@ -73,21 +72,15 @@ public abstract class IsIdentifiableAbstractRunner : IDisposable /// /// Matches year last, i.e d/m/y or m/d/y /// - readonly Regex _dateYearLast = new( - @"\b(?:(1[0-2]|0?[1-9])[ ]?[/-][ ]?(3[01]|[12][0-9]|0?[1-9])|(3[01]|[12][0-9]|0?[1-9])[ ]?[/-][ ]?(1[0-2]|0?[1-9]))[ ]?[/-][ ]?(?:[0-9]{2})?[0-9]{2}(\b|T)" // year last - ); + readonly Regex _dateYearLast = DateYearLastRegex(); /// /// Matches year first, i.e y/m/d or y/d/m /// - readonly Regex _dateYearFirst = new( - @"\b(?:[0-9]{2})?[0-9]{2}[ ]?[/-][ ]?(?:(1[0-2]|0?[1-9])[ ]?[/-][ ]?(3[01]|[12][0-9]|0?[1-9])|(3[01]|[12][0-9]|0?[1-9])[ ]?[/-][ ]?(1[0-2]|0?[1-9]))(\b|T)" // year first - ); + readonly Regex _dateYearFirst = DateYearFirstRegex(); /// /// Matches year missing, i.e d/m or m/d /// - readonly Regex _dateYearMissing = new( - @"\b(?:(1[0-2]|0?[1-9])[ ]?[/-][ ]?(3[01]|[12][0-9]|0?[1-9])|(3[01]|[12][0-9]|0?[1-9])[ ]?[/-][ ]?(1[0-2]|0?[1-9]))(\b|T)" // year missing - ); + readonly Regex _dateYearMissing = DateYearMissingRegex(); /// @@ -606,4 +599,28 @@ public virtual void Dispose() _logger?.Info($"ValidateCacheHits:{ValidateCacheHits} Total ValidateCacheMisses:{ValidateCacheMisses}"); _logger?.Info($"Total FailurePart identified: {CountOfFailureParts}"); } + + [GeneratedRegex(@"\b[0-3][0-9][0-1][0-9][0-9]{6}\b", RegexOptions.CultureInvariant)] + private static partial Regex ChiRegex(); + + [GeneratedRegex(@"\b((GIR 0AA)|((([A-Z-[QVX]][0-9][0-9]?)|(([A-Z-[QVX]][A-Z-[IJZ]][0-9][0-9]?)|(([A-Z-[QVX]][0-9][A-HJKSTUW])|([A-Z-[QVX]][A-Z-[IJZ]][0-9][ABEHMNPRVWXY]))))\s?[0-9][A-Z-[CIKMOV]]{2}))\b", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)] + private static partial Regex PostcodeRegex(); + + [GeneratedRegex(@"\d+((th)|(rd)|(st)|[\-/\\])?\s?((Jan(uary)?)|(Feb(ruary)?)|(Mar(ch)?)|(Apr(il)?)|(May)|(June?)|(July?)|(Aug(ust)?)|(Sep(tember)?)|(Oct(ober)?)|(Nov(ember)?)|(Dec(ember)?))", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)] + private static partial Regex SymbolMonthRegex(); + + [GeneratedRegex(@"\b\d+([:\-/\\]\d+)+\s?((AM)|(PM)|(GMT))?\b", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)] + private static partial Regex DateRegex(); + + [GeneratedRegex(@"((Jan(uary)?)|(Feb(ruary)?)|(Mar(ch)?)|(Apr(il)?)|(May)|(June?)|(July?)|(Aug(ust)?)|(Sep(tember)?)|(Oct(ober)?)|(Nov(ember)?)|(Dec(ember)?))[\s\-/\\]?\d+((th)|(rd)|(st))?", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)] + private static partial Regex MonthSymbolRegex(); + + [GeneratedRegex(@"\b(?:(1[0-2]|0?[1-9])[ ]?[/-][ ]?(3[01]|[12][0-9]|0?[1-9])|(3[01]|[12][0-9]|0?[1-9])[ ]?[/-][ ]?(1[0-2]|0?[1-9]))[ ]?[/-][ ]?(?:[0-9]{2})?[0-9]{2}(\b|T)", RegexOptions.CultureInvariant)] + private static partial Regex DateYearLastRegex(); + + [GeneratedRegex(@"\b(?:[0-9]{2})?[0-9]{2}[ ]?[/-][ ]?(?:(1[0-2]|0?[1-9])[ ]?[/-][ ]?(3[01]|[12][0-9]|0?[1-9])|(3[01]|[12][0-9]|0?[1-9])[ ]?[/-][ ]?(1[0-2]|0?[1-9]))(\b|T)", RegexOptions.CultureInvariant)] + private static partial Regex DateYearFirstRegex(); + + [GeneratedRegex(@"\b(?:(1[0-2]|0?[1-9])[ ]?[/-][ ]?(3[01]|[12][0-9]|0?[1-9])|(3[01]|[12][0-9]|0?[1-9])[ ]?[/-][ ]?(1[0-2]|0?[1-9]))(\b|T)", RegexOptions.CultureInvariant)] + private static partial Regex DateYearMissingRegex(); } diff --git a/Tests/IsIdentifiableTests/RunnerTests/DicomFileRunnerTest.cs b/Tests/IsIdentifiableTests/RunnerTests/DicomFileRunnerTest.cs index 3e1c55b3..cdd152e2 100644 --- a/Tests/IsIdentifiableTests/RunnerTests/DicomFileRunnerTest.cs +++ b/Tests/IsIdentifiableTests/RunnerTests/DicomFileRunnerTest.cs @@ -192,14 +192,12 @@ public void MissingPixelDataWhenValidationSkippedShouldThrow(string modality) // Act - var call = () => runner.ValidateDicomFile(fileInfo); - // Assert switch (modality) { case "CT": - var exc = Assert.Throws(() => call()); + var exc = Assert.Throws(ValidateCallback); Assert.Multiple(() => { Assert.That(exc!.Message, Is.EqualTo("Could not create DicomImage for file with SOPClassUID 'CT Image Storage [1.2.840.10008.5.1.4.1.1.2]'")); @@ -209,7 +207,7 @@ public void MissingPixelDataWhenValidationSkippedShouldThrow(string modality) break; case "SR": - Assert.DoesNotThrow(() => call()); + Assert.DoesNotThrow(ValidateCallback); Assert.Multiple(() => { Assert.That(runner.FilesValidated, Is.EqualTo(1)); @@ -220,6 +218,13 @@ public void MissingPixelDataWhenValidationSkippedShouldThrow(string modality) default: throw new Exception($"No case for {modality}"); } + + return; + + void ValidateCallback() + { + runner.ValidateDicomFile(fileInfo); + } } #endregion diff --git a/ii/IsIdentifiableFileGlobOptions.cs b/ii/IsIdentifiableFileGlobOptions.cs index 25d82cd2..93a19367 100644 --- a/ii/IsIdentifiableFileGlobOptions.cs +++ b/ii/IsIdentifiableFileGlobOptions.cs @@ -1,4 +1,4 @@ -using CommandLine; +using CommandLine; using IsIdentifiable.Options; namespace ii; @@ -11,9 +11,7 @@ namespace ii; internal class IsIdentifiableFileGlobOptions : IsIdentifiableFileOptions { // [Option('f', HelpText = "Path to a file or directory to be evaluated", Required = true)] -#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. // public new string FilePath { get; set; } -#pragma warning restore CS8618 [Option('g', HelpText = "Pattern to use for matching files when -f is a directory. Supports specifying a glob e.g. /**/*.csv", Required = false, Default = "*.csv")] public string Glob { get; set; } = "*.csv";