From 2dbbbfc6f82822107927b4b703edda2f3f7c86f7 Mon Sep 17 00:00:00 2001 From: Hisham Bin Ateya Date: Wed, 6 Nov 2024 03:03:21 +0300 Subject: [PATCH] Extract PDF on file system instead of memory --- .../Services/PdfMediaFileTextProvider.cs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/OrchardCore.Modules/OrchardCore.Media.Indexing.Pdf/Services/PdfMediaFileTextProvider.cs b/src/OrchardCore.Modules/OrchardCore.Media.Indexing.Pdf/Services/PdfMediaFileTextProvider.cs index 11f1bd98dac..1c5db024a31 100644 --- a/src/OrchardCore.Modules/OrchardCore.Media.Indexing.Pdf/Services/PdfMediaFileTextProvider.cs +++ b/src/OrchardCore.Modules/OrchardCore.Media.Indexing.Pdf/Services/PdfMediaFileTextProvider.cs @@ -11,16 +11,15 @@ public async Task GetTextAsync(string path, Stream fileStream) // https://github.com/UglyToad/PdfPig/blob/master/src/UglyToad.PdfPig.Core/StreamInputBytes.cs#L45. // Thus if it isn't, which is the case with e.g. Azure Blob Storage, we need to copy it to a new, seekable // Stream. - MemoryStream seekableStream = null; + FileStream seekableStream = null; try { if (!fileStream.CanSeek) { - // Since fileStream.Length might not be supported either, we can't preconfigure the capacity of the - // MemoryStream. - seekableStream = new MemoryStream(); - // While this involves loading the file into memory, we don't really have a choice. + seekableStream = CreateTemporaryFile(); + await fileStream.CopyToAsync(seekableStream); + seekableStream.Position = 0; } @@ -39,7 +38,16 @@ public async Task GetTextAsync(string path, Stream fileStream) if (seekableStream != null) { await seekableStream.DisposeAsync(); + + File.Delete(seekableStream.Name); } } } + + private static FileStream CreateTemporaryFile() + { + var tempFilePath = Path.Combine(Path.GetTempPath(), Path.GetTempFileName()); + + return new FileStream(tempFilePath, FileMode.Create, FileAccess.Write); + } }