-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added job to export Workforce Data to GCS (#1490)
- Loading branch information
Showing
13 changed files
with
361 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11 changes: 11 additions & 0 deletions
11
TeachingRecordSystem/src/TeachingRecordSystem.Core/Jobs/ExportWorkforceDataJob.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
using TeachingRecordSystem.Core.Services.WorkforceData; | ||
|
||
namespace TeachingRecordSystem.Core.Jobs; | ||
|
||
public class ExportWorkforceDataJob(WorkforceDataExporter workforceDataExporter) | ||
{ | ||
public async Task ExecuteAsync(CancellationToken cancellationToken) | ||
{ | ||
await workforceDataExporter.Export(cancellationToken); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
8 changes: 8 additions & 0 deletions
8
...tem/src/TeachingRecordSystem.Core/Services/WorkforceData/Google/IStorageClientProvider.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
using Google.Cloud.Storage.V1; | ||
|
||
namespace TeachingRecordSystem.Core.Services.WorkforceData.Google; | ||
|
||
public interface IStorageClientProvider | ||
{ | ||
ValueTask<StorageClient> GetStorageClientAsync(); | ||
} |
23 changes: 23 additions & 0 deletions
23
...c/TeachingRecordSystem.Core/Services/WorkforceData/Google/OptionsStorageClientProvider.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
using Google.Cloud.Storage.V1; | ||
using Microsoft.Extensions.Options; | ||
|
||
namespace TeachingRecordSystem.Core.Services.WorkforceData.Google; | ||
|
||
public class OptionsStorageClientProvider : IStorageClientProvider | ||
{ | ||
private readonly IOptions<WorkforceDataExportOptions> _optionsAccessor; | ||
|
||
public OptionsStorageClientProvider(IOptions<WorkforceDataExportOptions> optionsAccessor) | ||
{ | ||
ArgumentNullException.ThrowIfNull(optionsAccessor); | ||
_optionsAccessor = optionsAccessor; | ||
} | ||
|
||
public ValueTask<StorageClient> GetStorageClientAsync() | ||
{ | ||
var configuredClient = _optionsAccessor.Value.StorageClient ?? | ||
throw new InvalidOperationException($"No {nameof(WorkforceDataExportOptions.StorageClient)} has been configured."); | ||
|
||
return new ValueTask<StorageClient>(configuredClient); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
38 changes: 38 additions & 0 deletions
38
...c/TeachingRecordSystem.Core/Services/WorkforceData/WorkforceDataExportConfigureOptions.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
using System.Text.Json; | ||
using Google.Apis.Auth.OAuth2; | ||
using Google.Cloud.Storage.V1; | ||
using Microsoft.Extensions.Configuration; | ||
using Microsoft.Extensions.Options; | ||
|
||
namespace TeachingRecordSystem.Core.Services.WorkforceData; | ||
|
||
internal class WorkforceDataExportConfigureOptions : IConfigureOptions<WorkforceDataExportOptions> | ||
{ | ||
private readonly IConfiguration _configuration; | ||
|
||
public WorkforceDataExportConfigureOptions(IConfiguration configuration) | ||
{ | ||
_configuration = configuration; | ||
} | ||
|
||
public void Configure(WorkforceDataExportOptions options) | ||
{ | ||
ArgumentNullException.ThrowIfNull(options); | ||
|
||
var section = _configuration.GetSection("WorkforceDataExport"); | ||
|
||
options.BucketName = section["BucketName"]!; | ||
var credentialsJson = section["CredentialsJson"]; | ||
|
||
if (!string.IsNullOrEmpty(credentialsJson)) | ||
{ | ||
var credentialsJsonDoc = JsonDocument.Parse(credentialsJson); | ||
|
||
if (credentialsJsonDoc.RootElement.TryGetProperty("private_key", out _)) | ||
{ | ||
var creds = GoogleCredential.FromJson(credentialsJson); | ||
options.StorageClient = StorageClient.Create(creds); | ||
} | ||
} | ||
} | ||
} |
25 changes: 25 additions & 0 deletions
25
...ordSystem/src/TeachingRecordSystem.Core/Services/WorkforceData/WorkforceDataExportItem.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
namespace TeachingRecordSystem.Core.Services.WorkforceData; | ||
|
||
public record WorkforceDataExportItem | ||
{ | ||
public required Guid TpsEmploymentId { get; init; } | ||
public required Guid PersonId { get; init; } | ||
public required string Trn { get; init; } | ||
public required Guid EstablishmentId { get; init; } | ||
public required string EstablishmentSource { get; init; } | ||
public required int? EstablishmentUrn { get; init; } | ||
public required string LocalAuthorityCode { get; init; } | ||
public required string? EstablishmentNumber { get; init; } | ||
public required string EstablishmentName { get; init; } | ||
public required DateOnly StartDate { get; init; } | ||
public required DateOnly? EndDate { get; init; } | ||
public required DateOnly LastKnownTpsEmployedDate { get; init; } | ||
public required string EmploymentType { get; init; } | ||
public required bool WithdrawalConfirmed { get; init; } | ||
public required DateOnly LastExtractDate { get; init; } | ||
public required string Key { get; init; } | ||
public required string NationalInsuranceNumber { get; init; } | ||
public required string? PersonPostcode { get; init; } | ||
public required DateTime CreatedOn { get; init; } | ||
public required DateTime UpdatedOn { get; init; } | ||
} |
20 changes: 20 additions & 0 deletions
20
...System/src/TeachingRecordSystem.Core/Services/WorkforceData/WorkforceDataExportOptions.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
using System.Diagnostics.CodeAnalysis; | ||
using Google.Cloud.Storage.V1; | ||
|
||
namespace TeachingRecordSystem.Core.Services.WorkforceData; | ||
|
||
public class WorkforceDataExportOptions | ||
{ | ||
public StorageClient? StorageClient { get; set; } | ||
[DisallowNull] | ||
public string? BucketName { get; set; } | ||
|
||
[MemberNotNull(nameof(BucketName))] | ||
internal void ValidateOptions() | ||
{ | ||
if (BucketName is null) | ||
{ | ||
throw new InvalidOperationException($"{nameof(BucketName)} has not been configured."); | ||
} | ||
} | ||
} |
100 changes: 100 additions & 0 deletions
100
...ecordSystem/src/TeachingRecordSystem.Core/Services/WorkforceData/WorkforceDataExporter.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
using Microsoft.Extensions.Options; | ||
using Parquet.Serialization; | ||
using Parquet.Utils; | ||
using TeachingRecordSystem.Core.DataStore.Postgres; | ||
using TeachingRecordSystem.Core.Services.WorkforceData.Google; | ||
|
||
namespace TeachingRecordSystem.Core.Services.WorkforceData; | ||
|
||
public class WorkforceDataExporter( | ||
IClock clock, | ||
IDbContextFactory<TrsDbContext> dbContextFactory, | ||
IOptions<WorkforceDataExportOptions> optionsAccessor, | ||
IStorageClientProvider storageClientProvider) | ||
{ | ||
public async Task Export(CancellationToken cancellationToken) | ||
{ | ||
using var dbContext = dbContextFactory.CreateDbContext(); | ||
dbContext.Database.SetCommandTimeout(300); | ||
|
||
FormattableString querySql = | ||
$""" | ||
SELECT | ||
t.tps_employment_id, | ||
t.person_id, | ||
p.trn, | ||
e.establishment_id, | ||
s.name establishment_source, | ||
e.urn establishment_urn, | ||
e.la_code local_authority_code, | ||
e.establishment_number, | ||
e.establishment_name, | ||
t.start_date, | ||
t.end_date, | ||
t.last_known_tps_employed_date, | ||
CASE | ||
WHEN t.employment_type = 0 THEN 'FT' | ||
WHEN t.employment_type = 1 THEN 'PTR' | ||
WHEN t.employment_type = 2 THEN 'PTI' | ||
WHEN t.employment_type = 3 THEN 'PT' | ||
END employment_type, | ||
t.withdrawal_confirmed, | ||
t.last_extract_date, | ||
t.key, | ||
t.national_insurance_number, | ||
t.person_postcode, | ||
t.created_on, | ||
t.updated_on | ||
FROM | ||
tps_employments t | ||
JOIN | ||
persons p ON p.person_id = t.person_id | ||
JOIN | ||
establishments e ON e.establishment_id = t.establishment_id | ||
JOIN | ||
establishment_sources s ON s.establishment_source_id = e.establishment_source_id | ||
"""; | ||
|
||
var fileDateTime = clock.UtcNow.ToString("yyyyMMddHHmm"); | ||
var tempDirectory = Path.Combine(Path.GetTempPath(), $"workforce_data_{fileDateTime}"); | ||
Directory.CreateDirectory(tempDirectory); | ||
|
||
var i = 0; | ||
var fileNumber = 0; | ||
var itemsToExport = new List<WorkforceDataExportItem>(); | ||
await foreach (var item in dbContext.Database.SqlQuery<WorkforceDataExportItem>(querySql).AsAsyncEnumerable()) | ||
{ | ||
i++; | ||
itemsToExport.Add(item); | ||
|
||
if (i % 50000 == 0) | ||
{ | ||
fileNumber++; | ||
await ParquetSerializer.SerializeAsync(itemsToExport, Path.Combine(tempDirectory, $"workforce_data_{fileDateTime}_{fileNumber}.parquet")); | ||
itemsToExport.Clear(); | ||
} | ||
} | ||
|
||
if (itemsToExport.Count > 0) | ||
{ | ||
fileNumber++; | ||
await ParquetSerializer.SerializeAsync(itemsToExport, Path.Combine(tempDirectory, $"workforce_data_{fileDateTime}_{fileNumber}.parquet")); | ||
itemsToExport.Clear(); | ||
} | ||
|
||
using var stream = new MemoryStream(); | ||
var merger = new FileMerger(new DirectoryInfo(tempDirectory)); | ||
await merger.MergeFilesAsync(stream); | ||
await UploadFile(stream, $"workforce_data_{clock.UtcNow:yyyyMMddHHmm}.parquet", cancellationToken); | ||
Directory.Delete(tempDirectory, true); | ||
} | ||
|
||
private async Task UploadFile(Stream stream, string fileName, CancellationToken cancellationToken) | ||
{ | ||
var storageClient = await storageClientProvider.GetStorageClientAsync(); | ||
var options = optionsAccessor.Value; | ||
options.ValidateOptions(); | ||
|
||
await storageClient.UploadObjectAsync(options.BucketName, fileName, null, stream, cancellationToken: cancellationToken); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.