Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow endstream position to be off by one #286

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 36 additions & 9 deletions lib/object/object.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,18 +103,17 @@ class PDFObject {
lexer.pos = pos
}

const PDFStream = require('./stream') // lazy load, cause circular referecnes
const PDFStream = require('./stream') // lazy load, cause circular references
const stream = new PDFStream(obj)
stream.content = lexer.read(length)
lexer.skipEOL(1, true)

// not to be expected according to the PDF spec, but there are some PDF files that indent
// the stream
lexer.skipWhitespace(null, true)

if (lexer.readString(9) !== 'endstream') {
throw new Error('Invalid stream: `endstream` not found')
}
const orphan = parseEndStream(lexer)
if (orphan) {
const merged = new Uint8Array(stream.content.length + orphan.length);
merged.set(stream.content);
merged.set(orphan, stream.content.length);
stream.content = merged
}

lexer.skipEOL(1, true)
}
Expand All @@ -126,4 +125,32 @@ class PDFObject {
}
}

/**
* Parse an `endstream` while being tolerant to different ways of how different producers understood
* the spec. If there ends up being any extra content left before the `endstream`, it is returned.
*/
function parseEndStream(lexer) {
// some producer put the endstream at the end of the last line, and some to the next line
// -> accept both
lexer.skipEOL(1, true)

// not to be expected according to the PDF spec, but there are some PDF files that indent
// the stream
lexer.skipWhitespace(null, true)

const end = lexer.readString(9)
if (end === 'endstream') {
return null
}

// allow off by one error for length here (#285)
if (end.endsWith("endstrea")) {
if (lexer.readString(1) === "m") {
return Uint8Array.of(end.charCodeAt(0))
}
}

throw new Error('Invalid stream: `endstream` not found')
}

module.exports = PDFObject