From fd9112dcdf287bb274840102e1a778b5a0ce837f Mon Sep 17 00:00:00 2001 From: Hazem Ali Date: Wed, 6 Nov 2024 10:33:59 +0200 Subject: [PATCH 1/3] Optimized GetServiceInstance This fix minimizes memory usage by avoiding the duplication of services and using scoped resolution, which is ideal for transient, one-time configurations. Additionally, this approach leverages DI lifecycle management to ensure services are automatically cleaned up. --- shared/ServiceConfiguration.cs | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/shared/ServiceConfiguration.cs b/shared/ServiceConfiguration.cs index 5d189d40d..29b7d1d79 100644 --- a/shared/ServiceConfiguration.cs +++ b/shared/ServiceConfiguration.cs @@ -468,23 +468,28 @@ private void SetupForOpenAI() /// KM builder /// Action used to configure the service collection /// Target type/interface - private T GetServiceInstance(IKernelMemoryBuilder builder, Action addCustomService) + private T GetServiceInstance(IKernelMemoryBuilder builder, Action addCustomService) where T : class { - // Clone the list of service descriptors, skipping T descriptor - IServiceCollection services = new ServiceCollection(); - foreach (ServiceDescriptor d in builder.Services) + // Temporarily register the service with a scoped lifecycle to ensure it’s not duplicated or retained in memory unnecessarily + var serviceProvider = builder.Services.BuildServiceProvider(); + + // Add the custom service configuration directly in a new scope + using (var scope = serviceProvider.CreateScope()) { - if (d.ServiceType == typeof(T)) { continue; } - - services.Add(d); + // Apply custom service configuration within the scope + addCustomService(scope.ServiceProvider.GetRequiredService()); + + // Attempt to resolve the service + T instance = scope.ServiceProvider.GetService(); + + // Check if the instance was successfully created + if (instance == null) + { + throw new ConfigurationException($"Unable to build {typeof(T).Name}"); + } + + return instance; } - - // Add the custom T descriptor - addCustomService.Invoke(services); - - // Build and return an instance of T, as defined by `addCustomService` - return services.BuildServiceProvider().GetService() - ?? throw new ConfigurationException($"Unable to build {nameof(T)}"); } /// From 8d07edd20b2615752a8fb8f9ab2cbff79187269a Mon Sep 17 00:00:00 2001 From: Hazem Ali Date: Wed, 6 Nov 2024 10:37:17 +0200 Subject: [PATCH 2/3] Reduce memory usage during CopyToAsync Reduce memory usage during CopyToAsync --- shared/Ocr/Tesseract/TesseractOcrEngine.cs | 38 +++++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/shared/Ocr/Tesseract/TesseractOcrEngine.cs b/shared/Ocr/Tesseract/TesseractOcrEngine.cs index 75881c90c..796bfe60c 100644 --- a/shared/Ocr/Tesseract/TesseractOcrEngine.cs +++ b/shared/Ocr/Tesseract/TesseractOcrEngine.cs @@ -1,5 +1,5 @@ // Copyright (c) Microsoft. All rights reserved. - +using System; using System.IO; using System.Threading; using System.Threading.Tasks; @@ -11,30 +11,50 @@ namespace CopilotChat.Shared.Ocr.Tesseract; /// /// Wrapper for the TesseractEngine within the Tesseract OCR library. /// -public class TesseractOcrEngine : IOcrEngine +public class TesseractOcrEngine : IOcrEngine, IDisposable { private readonly TesseractEngine _engine; /// - /// Creates a new instance of the TesseractEngineWrapper passing in a valid TesseractEngine. + /// Creates a new instance of the TesseractOcrEngine passing in a valid TesseractEngine. /// public TesseractOcrEngine(TesseractOptions tesseractOptions) { + // Initialize TesseractEngine with provided options this._engine = new TesseractEngine(tesseractOptions.FilePath, tesseractOptions.Language); } /// public async Task ExtractTextFromImageAsync(Stream imageContent, CancellationToken cancellationToken = default) { - await using (var imgStream = new MemoryStream()) + try { - await imageContent.CopyToAsync(imgStream); - imgStream.Position = 0; + // Use a buffer for CopyToAsync to reduce memory usage for large images + await using (var imgStream = new MemoryStream()) + { + await imageContent.CopyToAsync(imgStream, 81920, cancellationToken); // Buffered copy with 80 KB buffer size + imgStream.Position = 0; // Reset position for reading - using var img = Pix.LoadFromMemory(imgStream.ToArray()); + // Load image from memory and process with Tesseract + using var img = Pix.LoadFromMemory(imgStream.ToArray()); + using var page = this._engine.Process(img); - using var page = this._engine.Process(img); - return page.GetText(); + return page.GetText(); // Return the extracted text + } + } + catch (OperationCanceledException) + { + // If operation is canceled, return an empty string or handle accordingly + return string.Empty; } } + + /// + /// Dispose the TesseractEngine resources. + /// + public void Dispose() + { + // Dispose of the TesseractEngine to free up resources + _engine.Dispose(); + } } From 92471645d845b9ad459bdeda6bd750adb3ea3b48 Mon Sep 17 00:00:00 2001 From: Devis Lucato Date: Tue, 12 Nov 2024 17:09:19 -0800 Subject: [PATCH 3/3] Update shared/Ocr/Tesseract/TesseractOcrEngine.cs --- shared/Ocr/Tesseract/TesseractOcrEngine.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/Ocr/Tesseract/TesseractOcrEngine.cs b/shared/Ocr/Tesseract/TesseractOcrEngine.cs index 0084aa4a1..bca730239 100644 --- a/shared/Ocr/Tesseract/TesseractOcrEngine.cs +++ b/shared/Ocr/Tesseract/TesseractOcrEngine.cs @@ -33,7 +33,7 @@ public async Task ExtractTextFromImageAsync(Stream imageContent, Cancell // Use a buffer for CopyToAsync to reduce memory usage for large images await using (var imgStream = new MemoryStream()) { - await imageContent.CopyToAsync(imgStream, 81920, cancellationToken); // Buffered copy with 80 KB buffer size + await imageContent.CopyToAsync(imgStream, 81920, cancellationToken).ConfigureAwait(false); // Buffered copy with 80 KB buffer size imgStream.Position = 0; // Reset position for reading // Load image from memory and process with Tesseract