Skip to content

Commit

Permalink
Use PdfDictionary to try and improve parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
vbuch committed Dec 1, 2023
1 parent cbf7e17 commit 6c4bb13
Show file tree
Hide file tree
Showing 8 changed files with 140 additions and 213 deletions.
130 changes: 61 additions & 69 deletions packages/placeholder-plain/src/__snapshots__/readRefTable.test.js.snap
Original file line number Diff line number Diff line change
@@ -1,70 +1,6 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`getXref Throws an error when size has unexpected value 1`] = `"Failed to parse size of xref table."`;

exports[`getXref Throws an error when size is not found 1`] = `"Size not found in xref table."`;

exports[`getXref Throws an error when xref is not at its expected position 1`] = `"Cross-Reference Streams not yet implemented."`;

exports[`getXref Throws an error when xref is not found at position 1`] = `"Cross-Reference Streams not yet implemented."`;

exports[`readRefTable Expects to merge correctly the refTable of resources 1`] = `
Object {
"maxIndex": 19,
"offsets": Map {
1 => 19012,
2 => 19,
3 => 224,
4 => 12330,
5 => 244,
6 => 11154,
7 => 11176,
8 => 11368,
9 => 11709,
10 => 11910,
11 => 11943,
12 => 12140,
13 => 12196,
14 => 18928,
15 => 12494,
17 => 13264,
18 => 18742,
19 => 18860,
},
"startingIndex": 0,
}
`;

exports[`readRefTable Expects to merge correctly the refTable of resources 2`] = `
Object {
"maxIndex": 21,
"offsets": Map {
1 => 25091,
2 => 19,
3 => 224,
4 => 12330,
5 => 244,
6 => 11154,
7 => 11176,
8 => 11368,
9 => 11709,
10 => 11910,
11 => 11943,
12 => 12140,
13 => 12196,
14 => 18948,
15 => 12494,
17 => 13264,
18 => 18742,
19 => 25016,
20 => 19431,
21 => 24878,
},
"startingIndex": 0,
}
`;

exports[`readRefTable Expects to merge correctly the refTable of resources 3`] = `
exports[`readRefTable Expects to merge correctly the refTable of contributing.pdf: contributing.pdf 1`] = `
Object {
"maxIndex": 24,
"offsets": Map {
Expand Down Expand Up @@ -97,7 +33,7 @@ Object {
}
`;

exports[`readRefTable Expects to merge correctly the refTable of resources 4`] = `
exports[`readRefTable Expects to merge correctly the refTable of formexample.pdf: formexample.pdf 1`] = `
Object {
"maxIndex": 62,
"offsets": Map {
Expand Down Expand Up @@ -168,7 +104,7 @@ Object {
}
`;

exports[`readRefTable Expects to merge correctly the refTable of resources 5`] = `
exports[`readRefTable Expects to merge correctly the refTable of incrementally_signed.pdf: incrementally_signed.pdf 1`] = `
Object {
"maxIndex": 18,
"offsets": Map {
Expand All @@ -195,7 +131,7 @@ Object {
}
`;

exports[`readRefTable Expects to merge correctly the refTable of resources 6`] = `
exports[`readRefTable Expects to merge correctly the refTable of signed.pdf: signed.pdf 1`] = `
Object {
"maxIndex": 13,
"offsets": Map {
Expand All @@ -217,7 +153,63 @@ Object {
}
`;

exports[`readRefTable Expects to merge correctly the refTable of resources 7`] = `
exports[`readRefTable Expects to merge correctly the refTable of signed-once.pdf: signed-once.pdf 1`] = `
Object {
"maxIndex": 19,
"offsets": Map {
1 => 19012,
2 => 19,
3 => 224,
4 => 12330,
5 => 244,
6 => 11154,
7 => 11176,
8 => 11368,
9 => 11709,
10 => 11910,
11 => 11943,
12 => 12140,
13 => 12196,
14 => 18928,
15 => 12494,
17 => 13264,
18 => 18742,
19 => 18860,
},
"startingIndex": 0,
}
`;

exports[`readRefTable Expects to merge correctly the refTable of signed-twice.pdf: signed-twice.pdf 1`] = `
Object {
"maxIndex": 21,
"offsets": Map {
1 => 25091,
2 => 19,
3 => 224,
4 => 12330,
5 => 244,
6 => 11154,
7 => 11176,
8 => 11368,
9 => 11709,
10 => 11910,
11 => 11943,
12 => 12140,
13 => 12196,
14 => 18948,
15 => 12494,
17 => 13264,
18 => 18742,
19 => 25016,
20 => 19431,
21 => 24878,
},
"startingIndex": 0,
}
`;

exports[`readRefTable Expects to merge correctly the refTable of w3dummy.pdf: w3dummy.pdf 1`] = `
Object {
"maxIndex": 15,
"offsets": Map {
Expand Down
9 changes: 5 additions & 4 deletions packages/placeholder-plain/src/findObject.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import getIndexFromRef from './getIndexFromRef';
import PdfDictionary from './PdfDictionary';

/**
* @typedef {object} FindObjectAtReturnType
Expand All @@ -13,20 +14,20 @@ import getIndexFromRef from './getIndexFromRef';
*/
export const findObjectAt = (pdf, position) => {
let slice = pdf.subarray(position);
slice = slice.subarray(0, slice.indexOf('endobj', 'utf8') - 1);
// ^ Buffer from the start position until the first endobj (included).
slice = slice.subarray(slice.indexOf('obj') + 3, slice.indexOf('endobj', 'utf8') - 1);
// ^ Buffer from the start position until the first endobj.

const dictionary = slice.subarray(
slice.indexOf('<<', 'utf8') + 2,
slice.indexOf('>>', 'utf8') - 1,
slice.lastIndexOf('>>', 'utf8'),
);
const stream = slice.subarray(
slice.indexOf('stream', 'utf8') + 6,
slice.indexOf('endstream', 'utf8') - 1,
);

return {
dictionary,
dictionary: new PdfDictionary(dictionary),
stream,
};
};
Expand Down
31 changes: 0 additions & 31 deletions packages/placeholder-plain/src/getValue.js

This file was deleted.

22 changes: 0 additions & 22 deletions packages/placeholder-plain/src/getValue.test.js

This file was deleted.

39 changes: 23 additions & 16 deletions packages/placeholder-plain/src/readPdf.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import readRefTable from './readRefTable';
import findObject from './findObject';
import {getValue} from './getValue';
import {SignPdfError} from '@signpdf/utils';
import readRefTable, {getLastXrefPosition} from './readRefTable';
import findObject, {findObjectAt} from './findObject';

/**
* @typedef {object} ReadPdfReturnType
Expand All @@ -23,26 +23,33 @@ import {getValue} from './getValue';
*/
const readPdf = (pdfBuffer) => {
// Extract the trailer dictionary.
const trailerStart = pdfBuffer.lastIndexOf('trailer');
// The trailer is followed by xref. Then an EOF. EOF's length is 6 characters.
const trailer = pdfBuffer.slice(trailerStart, pdfBuffer.length - 6);

let xRefPosition = trailer.slice(trailer.lastIndexOf('startxref') + 10).toString();

xRefPosition = parseInt(xRefPosition);
const refTable = readRefTable(pdfBuffer);

const rootRef = getValue(trailer, '/Root');
const xRefPosition = getLastXrefPosition(pdfBuffer);

let refTable;

const trailerObject = findObjectAt(pdfBuffer, xRefPosition);
if (trailerObject.stream.indexOf('trailer') !== -1) {
// assuming trailer
refTable = readRefTable(pdfBuffer, xRefPosition);
} else {
// assuming stream
if (!trailerObject.dictionary.has('/Filter')) {
throw new Error('Expected /Filter in trailer with streams.');
}
throw new SignPdfError(
'/Filter is not implemented.',
SignPdfError.TYPE_PARSE,
);
}
const rootRef = trailerObject.dictionary.get('/Root');
const root = findObject(pdfBuffer, refTable, rootRef).toString();

const infoRef = getValue(trailer, '/Info');
const infoRef = trailerObject.dictionary.get('/Info');

return {
xref: refTable,
rootRef,
root,
infoRef,
trailerStart,
xRefPosition,
};
};
Expand Down
36 changes: 23 additions & 13 deletions packages/placeholder-plain/src/readPdf.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,30 @@ import {readTestResource} from '@signpdf/internal-utils';
import readPdf from './readPdf';

describe(readPdf, () => {
it('reads contributing.pdf', () => {
const pdfBuffer = readTestResource('contributing.pdf');
it.each([
{
resource: 'signed-once.pdf',
xRefPosition: 19174,
root: 14,
rootByteOffset: 18928,
info: 15,
},
{
resource: 'contributing.pdf',
xRefPosition: 72203,
root: 12,
rootByteOffset: 4394,
info: 1,
},
])('reads $resource', ({
resource, root, info, xRefPosition, rootByteOffset,
}) => {
const pdfBuffer = readTestResource(resource);
const result = readPdf(pdfBuffer);

expect(result.xRefPosition).toBe(72203);
expect(result.rootRef).toBe('12 0 R');
expect(result.infoRef).toBe('1 0 R');
});
it('reads issue-79-test.pdf', () => {
const pdfBuffer = readTestResource('issue-79-test.pdf');
const result = readPdf(pdfBuffer);

expect(result.xRefPosition).toBe(1542);
expect(result.rootRef).toBe('2 0 R');
expect(result.infoRef).toBe('3 0 R');
expect(result.xRefPosition).toBe(xRefPosition);
expect(result.rootRef).toBe(`${root} 0 R`);
expect(result.infoRef).toBe(`${info} 0 R`);
expect(result.xref.offsets.get(root)).toBe(rootByteOffset);
});
});
Loading

0 comments on commit 6c4bb13

Please sign in to comment.