From ea15ab290c48c162fa2835ff24e493e8e6372e6d Mon Sep 17 00:00:00 2001 From: Markus Ast Date: Mon, 5 Sep 2022 17:59:11 +0200 Subject: [PATCH] allow endstream position to be off by one --- lib/object/object.js | 45 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/lib/object/object.js b/lib/object/object.js index bd9ffcb..c2f2f43 100644 --- a/lib/object/object.js +++ b/lib/object/object.js @@ -103,18 +103,17 @@ class PDFObject { lexer.pos = pos } - const PDFStream = require('./stream') // lazy load, cause circular referecnes + const PDFStream = require('./stream') // lazy load, cause circular references const stream = new PDFStream(obj) stream.content = lexer.read(length) - lexer.skipEOL(1, true) - - // not to be expected according to the PDF spec, but there are some PDF files that indent - // the stream - lexer.skipWhitespace(null, true) - if (lexer.readString(9) !== 'endstream') { - throw new Error('Invalid stream: `endstream` not found') - } + const orphan = parseEndStream(lexer) + if (orphan) { + const merged = new Uint8Array(stream.content.length + orphan.length); + merged.set(stream.content); + merged.set(orphan, stream.content.length); + stream.content = merged + } lexer.skipEOL(1, true) } @@ -126,4 +125,32 @@ class PDFObject { } } +/** + * Parse an `endstream` while being tolerant to different ways of how different producers understood + * the spec. If there ends up being any extra content left before the `endstream`, it is returned. + */ +function parseEndStream(lexer) { + // some producer put the endstream at the end of the last line, and some to the next line + // -> accept both + lexer.skipEOL(1, true) + + // not to be expected according to the PDF spec, but there are some PDF files that indent + // the stream + lexer.skipWhitespace(null, true) + + const end = lexer.readString(9) + if (end === 'endstream') { + return null + } + + // allow off by one error for length here (#285) + if (end.endsWith("endstrea")) { + if (lexer.readString(1) === "m") { + return Uint8Array.of(end.charCodeAt(0)) + } + } + + throw new Error('Invalid stream: `endstream` not found') +} + module.exports = PDFObject