Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make sure all assertion rules run at least once #1586

Merged
merged 28 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
326fda0
Bubble up exceptions, add context, and let test fail
basiliskus Nov 1, 2024
353c71f
Throw exception when no matching files found and simplify logic
basiliskus Nov 1, 2024
f292122
Fixed failing test
basiliskus Nov 4, 2024
1b16688
Added HapiHL7FileMatcherException custom exception and fixed tests
basiliskus Nov 4, 2024
694e447
Added javadocs
basiliskus Nov 4, 2024
a81a097
WIP: Added AzureBlobOrganizer to reorganize blobs by dated folders
basiliskus Nov 11, 2024
670fcdc
Merge branch 'main' into story/1523/org-azure-container
basiliskus Nov 11, 2024
ade9cda
Added cleanup logic, fixed storage paths and misc cleanup
basiliskus Nov 12, 2024
7fd09f3
Refactored AzureBlobOrganizer to better handle cooying and deleting b…
basiliskus Nov 12, 2024
2c00ae0
Removed non existing metadata and improved how we're checking the blo…
basiliskus Nov 13, 2024
1cb3dbf
Renaming and cleanup
basiliskus Nov 13, 2024
640dd21
Extracted time zone to use the same one everywhere
basiliskus Nov 13, 2024
05847e7
Moved methods to helper class
basiliskus Nov 13, 2024
6a025fa
Changed azure fetch logic to get blobs in expected path for todays date
basiliskus Nov 13, 2024
4a86f7f
Added tests for AzureBlobHelper
basiliskus Nov 13, 2024
1710fae
Renamed sourcePath => sourceName
basiliskus Nov 13, 2024
551d929
Added javadocs
basiliskus Nov 14, 2024
c8ac0a1
Added assertion to ensure all rules ran
basiliskus Nov 14, 2024
0f6b873
Added test coverage
basiliskus Nov 14, 2024
06cb089
Removed UTC timezone comment. Planning to keep using UTC
basiliskus Nov 15, 2024
572529d
Added comment explaining date path formatting
basiliskus Nov 15, 2024
2403364
Merge branch 'story/1523/org-azure-container' into story/1525/assert-…
basiliskus Nov 15, 2024
c7521b7
Added note about blob retention policy
basiliskus Nov 15, 2024
01c870d
Merge branch 'story/1523/org-azure-container' into story/1525/assert-…
basiliskus Nov 15, 2024
cf3b285
Added more context on file org
basiliskus Nov 15, 2024
bf1f723
Merge branch 'story/1523/org-azure-container' into story/1525/assert-…
basiliskus Nov 15, 2024
5530fed
Merge branch 'main' into story/1525/assert-all-assertions-run
basiliskus Nov 15, 2024
fdc062a
Renamed and added comment for clarity
basiliskus Nov 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,30 @@
import com.azure.storage.blob.BlobContainerClient;
import com.azure.storage.blob.BlobContainerClientBuilder;
import com.azure.storage.blob.models.BlobItem;
import com.azure.storage.blob.models.BlobProperties;
import com.azure.storage.blob.models.ListBlobsOptions;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.List;

/**
* The AzureBlobFileFetcher class implements the {@link FileFetcher FileFetcher} interface and
* fetches files from an Azure Blob Storage container.
* retrieves files from an Azure Blob Storage container.
*/
public class AzureBlobFileFetcher implements FileFetcher {

private static final FileFetcher INSTANCE = new AzureBlobFileFetcher();
// We're using UTC for now, but we plan to change the timezone to be more realistic to the
// working timezones in our teams
private static final ZoneId TIME_ZONE = ZoneOffset.UTC;
private static final int RETENTION_DAYS = 90;
private static final String CONTAINER_NAME = "automated";

private final BlobContainerClient blobContainerClient;

private static final FileFetcher INSTANCE = new AzureBlobFileFetcher();

private AzureBlobFileFetcher() {
String azureStorageConnectionName = "automated";
String azureStorageConnectionString = System.getenv("AZURE_STORAGE_CONNECTION_STRING");

if (azureStorageConnectionString == null || azureStorageConnectionString.isEmpty()) {
Expand All @@ -31,8 +37,11 @@ private AzureBlobFileFetcher() {
this.blobContainerClient =
new BlobContainerClientBuilder()
.connectionString(azureStorageConnectionString)
.containerName(azureStorageConnectionName)
.containerName(CONTAINER_NAME)
.buildClient();

AzureBlobOrganizer blobOrganizer = new AzureBlobOrganizer(blobContainerClient);
blobOrganizer.organizeAndCleanupBlobsByDate(RETENTION_DAYS, TIME_ZONE);
}

public static FileFetcher getInstance() {
Expand All @@ -41,33 +50,17 @@ public static FileFetcher getInstance() {

@Override
public List<HL7FileStream> fetchFiles() {
List<HL7FileStream> recentFiles = new ArrayList<>();
LocalDate mostRecentDay = null;
List<HL7FileStream> relevantFiles = new ArrayList<>();

for (BlobItem blobItem : blobContainerClient.listBlobs()) {
LocalDate today = LocalDate.now(TIME_ZONE);
String pathPrefix = AzureBlobHelper.buildDatePathPrefix(today);
ListBlobsOptions options = new ListBlobsOptions().setPrefix(pathPrefix);
for (BlobItem blobItem : blobContainerClient.listBlobs(options, null)) {
BlobClient blobClient = blobContainerClient.getBlobClient(blobItem.getName());
BlobProperties properties = blobClient.getProperties();

// Currently we're doing everything in UTC. If we start uploading files manually and
// running
// this test manually, we may want to revisit this logic and/or the file structure
// because midnight UTC is 5pm PDT on the previous day
LocalDate blobCreationDate =
properties.getLastModified().toInstant().atZone(ZoneOffset.UTC).toLocalDate();

if (mostRecentDay != null && blobCreationDate.isBefore(mostRecentDay)) {
continue;
}

if (mostRecentDay == null || blobCreationDate.isAfter(mostRecentDay)) {
mostRecentDay = blobCreationDate;
recentFiles.clear();
}

recentFiles.add(
relevantFiles.add(
new HL7FileStream(blobClient.getBlobName(), blobClient.openInputStream()));
}

return recentFiles;
return relevantFiles;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package gov.hhs.cdc.trustedintermediary.rse2e;

import java.time.LocalDate;

/* The AzureBlobHelper is a utility class that provides helper methods for working with Azure Blob Storage. */
public class AzureBlobHelper {

private AzureBlobHelper() {}

public static String buildDatePathPrefix(LocalDate date) {
return String.format(
"%d/%02d/%02d/", date.getYear(), date.getMonthValue(), date.getDayOfMonth());
}

public static String createDateBasedPath(LocalDate date, String originalName) {
return buildDatePathPrefix(date) + originalName;
}

public static boolean isInDateFolder(String blobPath, LocalDate creationDate) {
String expectedPath = buildDatePathPrefix(creationDate);
return blobPath.startsWith(expectedPath);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package gov.hhs.cdc.trustedintermediary.rse2e;

import com.azure.storage.blob.BlobClient;
import com.azure.storage.blob.BlobContainerClient;
import com.azure.storage.blob.models.BlobItem;
import com.azure.storage.blob.models.BlobProperties;
import com.azure.storage.blob.models.CopyStatusType;
import gov.hhs.cdc.trustedintermediary.context.ApplicationContext;
import gov.hhs.cdc.trustedintermediary.wrappers.Logger;
import java.time.Duration;
import java.time.LocalDate;
import java.time.ZoneId;

/* AzureBlobOrganizer is responsible for organizing and cleaning up blobs in an Azure container */
public class AzureBlobOrganizer {

private final BlobContainerClient blobContainerClient;

protected final Logger logger = ApplicationContext.getImplementation(Logger.class);

public AzureBlobOrganizer(BlobContainerClient blobContainerClient) {
this.blobContainerClient = blobContainerClient;
}

public void organizeAndCleanupBlobsByDate(int retentionDays, ZoneId timeZone) {
for (BlobItem blobItem : blobContainerClient.listBlobs()) {
String sourceName = blobItem.getName();
try {
BlobClient sourceBlob = blobContainerClient.getBlobClient(sourceName);
BlobProperties sourceProperties = sourceBlob.getProperties();
LocalDate sourceCreationDate =
sourceProperties
.getCreationTime()
.toInstant()
.atZone(timeZone)
.toLocalDate();

LocalDate retentionDate = LocalDate.now(timeZone).minusDays(retentionDays);
if (sourceCreationDate.isBefore(retentionDate)) {
sourceBlob.delete();
logger.logInfo("Deleted old blob: {}", sourceName);
continue;
}

if (AzureBlobHelper.isInDateFolder(sourceName, sourceCreationDate)) {
continue;
}

String destinationName =
AzureBlobHelper.createDateBasedPath(sourceCreationDate, sourceName);
BlobClient destinationBlob = blobContainerClient.getBlobClient(destinationName);
destinationBlob
.beginCopy(sourceBlob.getBlobUrl(), null)
.waitForCompletion(Duration.ofSeconds(30));

CopyStatusType copyStatus = destinationBlob.getProperties().getCopyStatus();
if (copyStatus == CopyStatusType.SUCCESS) {
sourceBlob.delete();
logger.logInfo("Moved blob {} to {}", sourceName, destinationName);
} else {
destinationBlob.delete();
logger.logError("Failed to copy blob: " + sourceName);
}
} catch (Exception e) {
logger.logError("Error processing blob: " + sourceName, e);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ public class LocalFileFetcher implements FileFetcher {

private static final String FILES_PATH = "../examples/Test/Automated/";
private static final String EXTENSION = "hl7";

private static final FileFetcher INSTANCE = new LocalFileFetcher();

private LocalFileFetcher() {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.inject.Inject;

/**
Expand Down Expand Up @@ -58,21 +60,28 @@ public void ensureRulesLoaded() throws RuleLoaderException {
}
}

public void runRules(Message outputMessage, Message inputMessage) {
public List<AssertionRule> getRules() {
return assertionRules;
}

public Set<AssertionRule> runRules(Message outputMessage, Message inputMessage) {
try {
ensureRulesLoaded();
} catch (RuleLoaderException e) {
logger.logError("Failed to load rules definitions", e);
return;
return Set.of();
basiliskus marked this conversation as resolved.
Show resolved Hide resolved
}

HapiHL7Message outputHapiMessage = new HapiHL7Message(outputMessage);
HapiHL7Message inputHapiMessage = new HapiHL7Message(inputMessage);

Set<AssertionRule> runRules = new HashSet<>();
for (AssertionRule rule : assertionRules) {
if (rule.shouldRun(outputHapiMessage)) {
rule.runRule(outputHapiMessage, inputHapiMessage);
runRules.add(rule);
}
}
return runRules;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,13 @@ import spock.lang.Specification

class AutomatedTest extends Specification {

List<HL7FileStream> recentAzureFiles
List<HL7FileStream> recentLocalFiles
List<HL7FileStream> azureFiles
List<HL7FileStream> localFiles
AssertionRuleEngine engine
HapiHL7FileMatcher fileMatcher
def mockLogger = Mock(Logger)
Logger mockLogger = Mock(Logger)

def setup() {
FileFetcher azureFileFetcher = AzureBlobFileFetcher.getInstance()
recentAzureFiles = azureFileFetcher.fetchFiles()

FileFetcher localFileFetcher = LocalFileFetcher.getInstance()
recentLocalFiles = localFileFetcher.fetchFiles()

engine = AssertionRuleEngine.getInstance()
fileMatcher = HapiHL7FileMatcher.getInstance()

Expand All @@ -38,29 +32,39 @@ class AutomatedTest extends Specification {
TestApplicationContext.register(Formatter, Jackson.getInstance())
TestApplicationContext.register(HapiHL7FileMatcher, fileMatcher)
TestApplicationContext.register(HealthDataExpressionEvaluator, HapiHL7ExpressionEvaluator.getInstance())
TestApplicationContext.register(AzureBlobFileFetcher, azureFileFetcher)
TestApplicationContext.register(LocalFileFetcher, LocalFileFetcher.getInstance())
TestApplicationContext.injectRegisteredImplementations()

FileFetcher azureFileFetcher = AzureBlobFileFetcher.getInstance()
azureFiles = azureFileFetcher.fetchFiles()

FileFetcher localFileFetcher = LocalFileFetcher.getInstance()
localFiles = localFileFetcher.fetchFiles()

engine.ensureRulesLoaded()
}

def cleanup() {
for (HL7FileStream fileStream : recentLocalFiles + recentAzureFiles) {
for (HL7FileStream fileStream : localFiles + azureFiles) {
fileStream.inputStream().close()
}
}

def "test defined assertions on relevant messages"() {
given:
def matchedFiles = fileMatcher.matchFiles(recentAzureFiles, recentLocalFiles)
def toRunRules = engine.getRules()
def matchedFiles = fileMatcher.matchFiles(azureFiles, localFiles)

when:
for (messagePair in matchedFiles) {
Message inputMessage = messagePair.getKey() as Message
Message outputMessage = messagePair.getValue() as Message
engine.runRules(outputMessage, inputMessage)
def runRules = engine.runRules(outputMessage, inputMessage)
toRunRules.removeAll(runRules)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like the naming here might be confusing to come back to in the future since the run in runRules can either be a verb (like 'go run the rules') or an adjective ('run rules are rules that already ran'). Maybe they could be evaluatedRules and rulesToEvaluate, or rulesThatRan and rulesToRun, or some other pair that's slightly more distinct from the verb-based method names?

It might also be worth adding a small comment here, like //Check whether all the rules in the assertions file have been run

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I'll change the name

}

then:
toRunRules.collect { it.name }.isEmpty()
0 * mockLogger.logError(_ as String, _ as Exception)
0 * mockLogger.logWarning(_ as String)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package gov.hhs.cdc.trustedintermediary.rse2e

import spock.lang.Specification

import java.time.LocalDate

class AzureBlobHelperTest extends Specification {

def "buildDatePathPrefix should create correct path format"() {
given:
def date = LocalDate.of(2024, 3, 15)

when:
def result = AzureBlobHelper.buildDatePathPrefix(date)

then:
result == "2024/03/15/"
}

def "createDateBasedPath should combine date prefix with filename"() {
given:
def date = LocalDate.of(2024, 3, 15)
def fileName = "test.hl7"

when:
def result = AzureBlobHelper.createDateBasedPath(date, fileName)

then:
result == "2024/03/15/test.hl7"
}

def "isInDateFolder should return true for matching date folder"() {
given:
def date = LocalDate.of(2024, 3, 15)
def path = "2024/03/15/test.hl7"

expect:
AzureBlobHelper.isInDateFolder(path, date)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class AssertionRuleEngineTest extends Specification {

then:
1 * mockRuleLoader.loadRules(_ as InputStream, _ as TypeReference) >> [mockRule]
ruleEngine.assertionRules.size() == 1
ruleEngine.getRules().size() == 1
}

def "ensureRulesLoaded loads rules only once by default"() {
Expand All @@ -50,7 +50,7 @@ class AssertionRuleEngineTest extends Specification {

then:
1 * mockRuleLoader.loadRules(_ as InputStream, _ as TypeReference) >> [mockRule]
ruleEngine.assertionRules.size() == 1
ruleEngine.rules.size() == 1
}

def "ensureRulesLoaded loads rules only once on multiple threads"() {
Expand Down