Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize Memory Usage and Add Cancellation Support in TesseractOcrEngine and ServiceConfiguration #1202

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 19 additions & 9 deletions shared/Ocr/Tesseract/TesseractOcrEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public sealed class TesseractOcrEngine : IOcrEngine, IDisposable
private readonly TesseractEngine _engine;

/// <summary>
/// Creates a new instance of the TesseractEngineWrapper passing in a valid TesseractEngine.
/// Creates a new instance of the TesseractOcrEngine passing in a valid TesseractEngine.
/// </summary>
public TesseractOcrEngine(TesseractConfig tesseractConfig)
{
Expand All @@ -28,15 +28,25 @@ public TesseractOcrEngine(TesseractConfig tesseractConfig)
///<inheritdoc/>
public async Task<string> ExtractTextFromImageAsync(Stream imageContent, CancellationToken cancellationToken = default)
{
await using (var imgStream = new MemoryStream())
try
{
await imageContent.CopyToAsync(imgStream, cancellationToken);
imgStream.Position = 0;

using var img = Pix.LoadFromMemory(imgStream.ToArray());

using var page = this._engine.Process(img);
return page.GetText();
// Use a buffer for CopyToAsync to reduce memory usage for large images
await using (var imgStream = new MemoryStream())
{
await imageContent.CopyToAsync(imgStream, 81920, cancellationToken).ConfigureAwait(false); // Buffered copy with 80 KB buffer size
imgStream.Position = 0; // Reset position for reading

// Load image from memory and process with Tesseract
using var img = Pix.LoadFromMemory(imgStream.ToArray());
using var page = this._engine.Process(img);

return page.GetText(); // Return the extracted text
}
}
catch (OperationCanceledException)
{
// If operation is canceled, return an empty string or handle accordingly
return string.Empty;
}
}

Expand Down
33 changes: 19 additions & 14 deletions shared/ServiceConfiguration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -496,23 +496,28 @@ private void SetupForOpenAI()
/// <param name="builder">KM builder</param>
/// <param name="addCustomService">Action used to configure the service collection</param>
/// <typeparam name="T">Target type/interface</typeparam>
private T GetServiceInstance<T>(IKernelMemoryBuilder builder, Action<IServiceCollection> addCustomService)
private T GetServiceInstance<T>(IKernelMemoryBuilder builder, Action<IServiceCollection> addCustomService) where T : class
{
// Clone the list of service descriptors, skipping T descriptor
IServiceCollection services = new ServiceCollection();
foreach (ServiceDescriptor d in builder.Services)
// Temporarily register the service with a scoped lifecycle to ensure it’s not duplicated or retained in memory unnecessarily
var serviceProvider = builder.Services.BuildServiceProvider();

// Add the custom service configuration directly in a new scope
using (var scope = serviceProvider.CreateScope())
{
if (d.ServiceType == typeof(T)) { continue; }

services.Add(d);
// Apply custom service configuration within the scope
addCustomService(scope.ServiceProvider.GetRequiredService<IServiceCollection>());

// Attempt to resolve the service
T instance = scope.ServiceProvider.GetService<T>();

// Check if the instance was successfully created
if (instance == null)
{
throw new ConfigurationException($"Unable to build {typeof(T).Name}");
}

return instance;
}

// Add the custom T descriptor
addCustomService.Invoke(services);

// Build and return an instance of T, as defined by `addCustomService`
return services.BuildServiceProvider().GetService<T>()
?? throw new ConfigurationException($"Unable to build {nameof(T)}");
}

/// <summary>
Expand Down