Skip to content

Commit

Permalink
Warning fixups, pre-compile regexes
Browse files Browse the repository at this point in the history
  • Loading branch information
jas88 committed Aug 12, 2024
1 parent babff8b commit ceb42c8
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 25 deletions.
7 changes: 5 additions & 2 deletions IsIdentifiable/Reporting/Destinations/ReportDestination.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace IsIdentifiable.Reporting.Destinations;
/// in a derrived class allows persistence of IsIdentifiable reports (e.g. to a
/// CSV file or database table).
/// </summary>
public abstract class ReportDestination : IReportDestination
public abstract partial class ReportDestination : IReportDestination
{
/// <summary>
/// The options used to run IsIdentifiable
Expand All @@ -23,7 +23,7 @@ public abstract class ReportDestination : IReportDestination
/// </summary>
protected readonly IFileSystem FileSystem;

private readonly Regex _multiSpaceRegex = new(" {2,}");
private readonly Regex _multiSpaceRegex = MultispaceRegex();

/// <summary>
/// Initializes the report destination and sets <see cref="Options"/>
Expand Down Expand Up @@ -70,4 +70,7 @@ protected object StripWhitespace(object o)

return o;
}

[GeneratedRegex(" {2,}", RegexOptions.CultureInvariant)]
private static partial Regex MultispaceRegex();
}
49 changes: 33 additions & 16 deletions IsIdentifiable/Runners/IsIdentifiableAbstractRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace IsIdentifiable.Runners;
/// Subclass to add support for new data sources. Current sources include reading from
/// CSV files, Dicom files and database tables.
/// </summary>
public abstract class IsIdentifiableAbstractRunner : IDisposable
public abstract partial class IsIdentifiableAbstractRunner : IDisposable
{
private readonly ILogger _logger = LogManager.GetCurrentClassLogger();

Expand All @@ -45,26 +45,25 @@ public abstract class IsIdentifiableAbstractRunner : IDisposable

// DDMMYY + 4 digits
// \b bounded i.e. not more than 10 digits
private readonly Regex _chiRegex = new(@"\b[0-3][0-9][0-1][0-9][0-9]{6}\b");
readonly Regex _postcodeRegex = new(@"\b((GIR 0AA)|((([A-Z-[QVX]][0-9][0-9]?)|(([A-Z-[QVX]][A-Z-[IJZ]][0-9][0-9]?)|(([A-Z-[QVX]][0-9][A-HJKSTUW])|([A-Z-[QVX]][A-Z-[IJZ]][0-9][ABEHMNPRVWXY]))))\s?[0-9][A-Z-[CIKMOV]]{2}))\b", RegexOptions.IgnoreCase);
private readonly Regex _chiRegex = ChiRegex();
readonly Regex _postcodeRegex = PostcodeRegex();

/// <summary>
/// Matches a 'symbol' (digit followed by an optional th, rd or separator) then a month name (e.g. Jan or January)
/// </summary>
readonly Regex _symbolThenMonth = new(@"\d+((th)|(rd)|(st)|[\-/\\])?\s?((Jan(uary)?)|(Feb(ruary)?)|(Mar(ch)?)|(Apr(il)?)|(May)|(June?)|(July?)|(Aug(ust)?)|(Sep(tember)?)|(Oct(ober)?)|(Nov(ember)?)|(Dec(ember)?))", RegexOptions.IgnoreCase);
readonly Regex _symbolThenMonth = SymbolMonthRegex();

/// <summary>
/// Matches a month name (e.g. Jan or January) followed by a 'symbol' (digit followed by an optional th, rd or separator) then a
/// </summary>
readonly Regex _monthThenSymbol = new(@"((Jan(uary)?)|(Feb(ruary)?)|(Mar(ch)?)|(Apr(il)?)|(May)|(June?)|(July?)|(Aug(ust)?)|(Sep(tember)?)|(Oct(ober)?)|(Nov(ember)?)|(Dec(ember)?))[\s\-/\\]?\d+((th)|(rd)|(st))?", RegexOptions.IgnoreCase);
readonly Regex _monthThenSymbol = MonthSymbolRegex();

/// <summary>
/// Matches digits followed by a separator (: - \ etc) followed by more digits with optional AM / PM / GMT at the end
/// However this looks more like a time than a date and I would argue that times are not PII?
/// It's also not restrictive enough so matches too many non-PII numerics.
/// </summary>
readonly Regex _date = new(
@"\b\d+([:\-/\\]\d+)+\s?((AM)|(PM)|(GMT))?\b", RegexOptions.IgnoreCase);
readonly Regex _date = DateRegex();

// The following regex were adapted from:
// https://www.oreilly.com/library/view/regular-expressions-cookbook/9781449327453/ch04s04.html
Expand All @@ -73,21 +72,15 @@ public abstract class IsIdentifiableAbstractRunner : IDisposable
/// <summary>
/// Matches year last, i.e d/m/y or m/d/y
/// </summary>
readonly Regex _dateYearLast = new(
@"\b(?:(1[0-2]|0?[1-9])[ ]?[/-][ ]?(3[01]|[12][0-9]|0?[1-9])|(3[01]|[12][0-9]|0?[1-9])[ ]?[/-][ ]?(1[0-2]|0?[1-9]))[ ]?[/-][ ]?(?:[0-9]{2})?[0-9]{2}(\b|T)" // year last
);
readonly Regex _dateYearLast = DateYearLastRegex();
/// <summary>
/// Matches year first, i.e y/m/d or y/d/m
/// </summary>
readonly Regex _dateYearFirst = new(
@"\b(?:[0-9]{2})?[0-9]{2}[ ]?[/-][ ]?(?:(1[0-2]|0?[1-9])[ ]?[/-][ ]?(3[01]|[12][0-9]|0?[1-9])|(3[01]|[12][0-9]|0?[1-9])[ ]?[/-][ ]?(1[0-2]|0?[1-9]))(\b|T)" // year first
);
readonly Regex _dateYearFirst = DateYearFirstRegex();
/// <summary>
/// Matches year missing, i.e d/m or m/d
/// </summary>
readonly Regex _dateYearMissing = new(
@"\b(?:(1[0-2]|0?[1-9])[ ]?[/-][ ]?(3[01]|[12][0-9]|0?[1-9])|(3[01]|[12][0-9]|0?[1-9])[ ]?[/-][ ]?(1[0-2]|0?[1-9]))(\b|T)" // year missing
);
readonly Regex _dateYearMissing = DateYearMissingRegex();


/// <summary>
Expand Down Expand Up @@ -606,4 +599,28 @@ public virtual void Dispose()
_logger?.Info($"ValidateCacheHits:{ValidateCacheHits} Total ValidateCacheMisses:{ValidateCacheMisses}");
_logger?.Info($"Total FailurePart identified: {CountOfFailureParts}");
}

[GeneratedRegex(@"\b[0-3][0-9][0-1][0-9][0-9]{6}\b", RegexOptions.CultureInvariant)]
private static partial Regex ChiRegex();

[GeneratedRegex(@"\b((GIR 0AA)|((([A-Z-[QVX]][0-9][0-9]?)|(([A-Z-[QVX]][A-Z-[IJZ]][0-9][0-9]?)|(([A-Z-[QVX]][0-9][A-HJKSTUW])|([A-Z-[QVX]][A-Z-[IJZ]][0-9][ABEHMNPRVWXY]))))\s?[0-9][A-Z-[CIKMOV]]{2}))\b", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)]
private static partial Regex PostcodeRegex();

[GeneratedRegex(@"\d+((th)|(rd)|(st)|[\-/\\])?\s?((Jan(uary)?)|(Feb(ruary)?)|(Mar(ch)?)|(Apr(il)?)|(May)|(June?)|(July?)|(Aug(ust)?)|(Sep(tember)?)|(Oct(ober)?)|(Nov(ember)?)|(Dec(ember)?))", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)]
private static partial Regex SymbolMonthRegex();

[GeneratedRegex(@"\b\d+([:\-/\\]\d+)+\s?((AM)|(PM)|(GMT))?\b", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)]
private static partial Regex DateRegex();

[GeneratedRegex(@"((Jan(uary)?)|(Feb(ruary)?)|(Mar(ch)?)|(Apr(il)?)|(May)|(June?)|(July?)|(Aug(ust)?)|(Sep(tember)?)|(Oct(ober)?)|(Nov(ember)?)|(Dec(ember)?))[\s\-/\\]?\d+((th)|(rd)|(st))?", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)]
private static partial Regex MonthSymbolRegex();

[GeneratedRegex(@"\b(?:(1[0-2]|0?[1-9])[ ]?[/-][ ]?(3[01]|[12][0-9]|0?[1-9])|(3[01]|[12][0-9]|0?[1-9])[ ]?[/-][ ]?(1[0-2]|0?[1-9]))[ ]?[/-][ ]?(?:[0-9]{2})?[0-9]{2}(\b|T)", RegexOptions.CultureInvariant)]
private static partial Regex DateYearLastRegex();

[GeneratedRegex(@"\b(?:[0-9]{2})?[0-9]{2}[ ]?[/-][ ]?(?:(1[0-2]|0?[1-9])[ ]?[/-][ ]?(3[01]|[12][0-9]|0?[1-9])|(3[01]|[12][0-9]|0?[1-9])[ ]?[/-][ ]?(1[0-2]|0?[1-9]))(\b|T)", RegexOptions.CultureInvariant)]
private static partial Regex DateYearFirstRegex();

[GeneratedRegex(@"\b(?:(1[0-2]|0?[1-9])[ ]?[/-][ ]?(3[01]|[12][0-9]|0?[1-9])|(3[01]|[12][0-9]|0?[1-9])[ ]?[/-][ ]?(1[0-2]|0?[1-9]))(\b|T)", RegexOptions.CultureInvariant)]
private static partial Regex DateYearMissingRegex();
}
13 changes: 9 additions & 4 deletions Tests/IsIdentifiableTests/RunnerTests/DicomFileRunnerTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -192,14 +192,12 @@ public void MissingPixelDataWhenValidationSkippedShouldThrow(string modality)

// Act

var call = () => runner.ValidateDicomFile(fileInfo);

// Assert

switch (modality)
{
case "CT":
var exc = Assert.Throws<ApplicationException>(() => call());
var exc = Assert.Throws<ApplicationException>(ValidateCallback);
Assert.Multiple(() =>
{
Assert.That(exc!.Message, Is.EqualTo("Could not create DicomImage for file with SOPClassUID 'CT Image Storage [1.2.840.10008.5.1.4.1.1.2]'"));
Expand All @@ -209,7 +207,7 @@ public void MissingPixelDataWhenValidationSkippedShouldThrow(string modality)
break;

case "SR":
Assert.DoesNotThrow(() => call());
Assert.DoesNotThrow(ValidateCallback);
Assert.Multiple(() =>
{
Assert.That(runner.FilesValidated, Is.EqualTo(1));
Expand All @@ -220,6 +218,13 @@ public void MissingPixelDataWhenValidationSkippedShouldThrow(string modality)
default:
throw new Exception($"No case for {modality}");
}

return;

void ValidateCallback()
{
runner.ValidateDicomFile(fileInfo);
}
}

#endregion
Expand Down
4 changes: 1 addition & 3 deletions ii/IsIdentifiableFileGlobOptions.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using CommandLine;
using CommandLine;
using IsIdentifiable.Options;

namespace ii;
Expand All @@ -11,9 +11,7 @@ namespace ii;
internal class IsIdentifiableFileGlobOptions : IsIdentifiableFileOptions
{
// [Option('f', HelpText = "Path to a file or directory to be evaluated", Required = true)]
#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable.
// public new string FilePath { get; set; }
#pragma warning restore CS8618

[Option('g', HelpText = "Pattern to use for matching files when -f is a directory. Supports specifying a glob e.g. /**/*.csv", Required = false, Default = "*.csv")]
public string Glob { get; set; } = "*.csv";
Expand Down

0 comments on commit ceb42c8

Please sign in to comment.