Skip to content

Commit

Permalink
Extract PDF on file system instead of memory
Browse files Browse the repository at this point in the history
  • Loading branch information
hishamco committed Nov 6, 2024
1 parent d5a4d64 commit 2dbbbfc
Showing 1 changed file with 13 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,15 @@ public async Task<string> GetTextAsync(string path, Stream fileStream)
// https://github.com/UglyToad/PdfPig/blob/master/src/UglyToad.PdfPig.Core/StreamInputBytes.cs#L45.
// Thus if it isn't, which is the case with e.g. Azure Blob Storage, we need to copy it to a new, seekable
// Stream.
MemoryStream seekableStream = null;
FileStream seekableStream = null;
try
{
if (!fileStream.CanSeek)
{
// Since fileStream.Length might not be supported either, we can't preconfigure the capacity of the
// MemoryStream.
seekableStream = new MemoryStream();
// While this involves loading the file into memory, we don't really have a choice.
seekableStream = CreateTemporaryFile();

await fileStream.CopyToAsync(seekableStream);

seekableStream.Position = 0;
}

Expand All @@ -39,7 +38,16 @@ public async Task<string> GetTextAsync(string path, Stream fileStream)
if (seekableStream != null)
{
await seekableStream.DisposeAsync();

File.Delete(seekableStream.Name);
}
}
}

private static FileStream CreateTemporaryFile()
{
var tempFilePath = Path.Combine(Path.GetTempPath(), Path.GetTempFileName());

return new FileStream(tempFilePath, FileMode.Create, FileAccess.Write);
}
}

0 comments on commit 2dbbbfc

Please sign in to comment.