Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use lazy loading for object-streams and their objects #85

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@ public static void HandleUnexpectedCharacter(char ch)
"If you think this is a bug in PDFsharp, please send us your PDF file.", (int)ch);
ThrowParserException(message);
}
public static void HandleUnexpectedToken(string token)
public static void HandleUnexpectedToken(string token, int position)
{
string message = String.Format(CultureInfo.InvariantCulture,
"Unexpected token '{0}' in PDF stream. The file may be corrupted. " +
"If you think this is a bug in PDFsharp, please send us your PDF file.", token);
"Unexpected token '{0}' at position {1} in PDF stream. The file may be corrupted. " +
"If you think this is a bug in PDFsharp, please send us your PDF file.", token, position);
ThrowParserException(message);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace PdfSharp.Pdf.Advanced
sealed class PdfCrossReferenceStream : PdfTrailer // Reference: 3.4.7 Cross-Reference Streams / Page 106
{
/// <summary>
/// Initializes a new instance of the <see cref="PdfObjectStream"/> class.
/// Initializes a new instance of the <see cref="PdfCrossReferenceStream"/> class.
/// </summary>
public PdfCrossReferenceStream(PdfDocument document)
: base(document)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,37 +49,6 @@ internal PdfObjectStream(PdfDictionary dict)
#endif
}

/// <summary>
/// Reads the compressed object with the specified index.
/// </summary>
internal void ReadReferences(PdfCrossReferenceTable xrefTable)
{
////// Create parser for stream.
////Parser parser = new Parser(_document, new MemoryStream(Stream.Value));
for (int idx = 0; idx < _header.Length; idx++)
{
int objectNumber = _header[idx][0];
int offset = _header[idx][1];

PdfObjectID objectID = new PdfObjectID(objectNumber);

// HACK: -1 indicates compressed object.
PdfReference iref = new PdfReference(objectID, -1);
////iref.ObjectID = objectID;
////iref.Value = xrefStream;
if (!xrefTable.Contains(iref.ObjectID))
{
xrefTable.Add(iref);
}
else
{
#if DEBUG
GetType();
#endif
}
}
}

/// <summary>
/// Reads the compressed object with the specified index.
/// </summary>
Expand Down Expand Up @@ -108,7 +77,7 @@ public class Keys : PdfStream.Keys

/// <summary>
/// (Required) The type of PDF object that this dictionary describes;
/// must be ObjStmfor an object stream.
/// must be ObjStm for an object stream.
/// </summary>
[KeyInfo(KeyType.Name | KeyType.Required, FixedValue = "ObjStm")]
public const string Type = "/Type";
Expand All @@ -130,7 +99,7 @@ public class Keys : PdfStream.Keys
/// (Optional) A reference to an object stream, of which the current object
/// stream is considered an extension. Both streams are considered part of
/// a collection of object streams (see below). A given collection consists
/// of a set of streams whose Extendslinks form a directed acyclic graph.
/// of a set of streams whose Extends links form a directed acyclic graph.
/// </summary>
[KeyInfo(KeyType.Stream | KeyType.Optional)]
public const string Extends = "/Extends";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace PdfSharp.Pdf.Advanced
/// Represents an indirect reference to a PdfObject.
/// </summary>
[DebuggerDisplay("iref({ObjectNumber}, {GenerationNumber})")]
public sealed class PdfReference : PdfItem
public class PdfReference : PdfItem
{
// About PdfReference
//
Expand Down Expand Up @@ -154,7 +154,7 @@ public int Position
/// <summary>
/// Gets or sets the referenced PdfObject.
/// </summary>
public PdfObject Value
public virtual PdfObject Value
{
get => _value;
set
Expand Down Expand Up @@ -246,4 +246,93 @@ public int Compare(PdfReference? l, PdfReference? r)
int _uid;
#endif
}

/// <summary>
/// Represents an indirect reference to an object stored in an <see cref="PdfObjectStream"/><br></br>
/// The value of this object is "lazily" loaded when first accessed.
/// </summary>
public sealed class PdfReferenceToCompressedObject : PdfReference
{
private readonly int _objectStreamNumber;
private readonly int _indexInObjectStream;

internal PdfReferenceToCompressedObject(PdfDocument doc, PdfObjectID objectID,
int objectStreamNumber, int indexInObjectStream)
: base(objectID, -1)
{
Document = doc ?? throw new ArgumentNullException(nameof(doc));
_objectStreamNumber = objectStreamNumber;
_indexInObjectStream = indexInObjectStream;
}

public override PdfObject Value
{
get
{
if (base.Value is null)
{
ReadValue();
}
return base.Value!;
}
set => base.Value = value;
}

/// <summary>
/// Reads the value of this object
/// </summary>
void ReadValue()
{
PdfObjectStream? ostm = null;
var stmObjID = new PdfObjectID(_objectStreamNumber);
// reference to object stream
var streamRef = Document.IrefTable[stmObjID];
if (streamRef is not null)
{
if (streamRef.Value is null)
{
// object stream not yet loaded. do it now
var parser = Document.GetParser()!;
var state = parser.SaveState();
var obj = parser.ReadObject(null, stmObjID, false, false);
if (obj is PdfDictionary ostmDict)
{
// decrypt if necessary
// must be done before type-transformation because PdfObjectStream
// tries to parse the stream-header in the constructor
Document.EffectiveSecurityHandler?.DecryptObject(ostmDict);
ostm = new PdfObjectStream(ostmDict);
}
parser.RestoreState(state);
Debug.Assert(ostm != null, "Object stream should not be null here");
}
// already transformed ?
else if (streamRef.Value is not PdfObjectStream existingOstm)
{
if (streamRef.Value is PdfDictionary ostmDict)
{
// decrypt if necessary
Document.EffectiveSecurityHandler?.DecryptObject(ostmDict);
ostm = new PdfObjectStream(ostmDict);
}
Debug.Assert(ostm != null, "Object stream should not be null here");
}
else
ostm = existingOstm;

if (ostm is not null)
{
// store the loaded and decrypted object-stream
streamRef.Value = ostm;
// read the actual object we're looking for
var iref = ostm.ReadCompressedObject(_indexInObjectStream);
if (iref is not null)
{
Debug.Assert(iref.ObjectID == ObjectID, "ObjectID mismatch");
base.Value = iref.Value;
}
}
}
}
}
}
4 changes: 2 additions & 2 deletions src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/Lexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -937,8 +937,8 @@ public double TokenToReal
{
get
{
// ReSharper disable once CompareOfFloatsByEqualityOperator
Debug.Assert(_tokenAsReal == double.Parse(_token.ToString(), CultureInfo.InvariantCulture));
// had several documents where the assertion failed with an equality comparision (==)
Debug.Assert(Math.Abs(_tokenAsReal - double.Parse(_token.ToString(), CultureInfo.InvariantCulture)) < 0.000000001);
return _tokenAsReal;
}
}
Expand Down
Loading