ganze project
This commit is contained in:
96
node_modules/pdf-parse/dist/node/cjs/index.cjs
generated
vendored
Normal file
96
node_modules/pdf-parse/dist/node/cjs/index.cjs
generated
vendored
Normal file
@@ -0,0 +1,96 @@
|
||||
"use strict";
|
||||
var __create = Object.create;
|
||||
var __defProp = Object.defineProperty;
|
||||
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
||||
var __getOwnPropNames = Object.getOwnPropertyNames;
|
||||
var __getProtoOf = Object.getPrototypeOf;
|
||||
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
||||
var __export = (target, all) => {
|
||||
for (var name in all)
|
||||
__defProp(target, name, { get: all[name], enumerable: true });
|
||||
};
|
||||
var __copyProps = (to, from, except, desc) => {
|
||||
if (from && typeof from === "object" || typeof from === "function") {
|
||||
for (let key of __getOwnPropNames(from))
|
||||
if (!__hasOwnProp.call(to, key) && key !== except)
|
||||
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
||||
}
|
||||
return to;
|
||||
};
|
||||
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
||||
// If the importer is in node compatibility mode or this is not an ESM
|
||||
// file that has been converted to a CommonJS file using a Babel-
|
||||
// compatible transform (i.e. "__esModule" has not been set), then set
|
||||
// "default" to the CommonJS "module.exports" for node compatibility.
|
||||
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
||||
mod
|
||||
));
|
||||
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
||||
|
||||
// src/node/index.ts
|
||||
var index_exports = {};
|
||||
__export(index_exports, {
|
||||
getHeader: () => getHeader
|
||||
});
|
||||
module.exports = __toCommonJS(index_exports);
|
||||
|
||||
// src/node/getHeader.ts
|
||||
var http = __toESM(require("node:http"), 1);
|
||||
var https = __toESM(require("node:https"), 1);
|
||||
async function nodeRequest(u, method, headers) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reqFn = u.protocol === "https:" ? https.request : http.request;
|
||||
const req = reqFn(u, { method, headers }, (res) => {
|
||||
const headersObj = {};
|
||||
for (const [k, v] of Object.entries(res.headers)) {
|
||||
headersObj[k] = Array.isArray(v) ? v.join(",") : v ?? "";
|
||||
}
|
||||
const chunks = [];
|
||||
res.on("data", (c) => chunks.push(Buffer.from(c)));
|
||||
res.on("end", () => {
|
||||
const buffer = chunks.length ? Buffer.concat(chunks) : void 0;
|
||||
resolve({ status: res.statusCode ?? 0, headers: headersObj, buffer });
|
||||
});
|
||||
});
|
||||
req.on("error", (err) => reject(err));
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
async function getHeader(url, check = false) {
|
||||
try {
|
||||
const u = typeof url === "string" ? new URL(url) : url;
|
||||
const headResp = await nodeRequest(u, "HEAD");
|
||||
const size = headResp.headers["content-length"] ? parseInt(headResp.headers["content-length"], 10) : void 0;
|
||||
let magic = null;
|
||||
if (check) {
|
||||
const rangeResp = await nodeRequest(u, "GET", { Range: "bytes=0-4" });
|
||||
if (rangeResp.status >= 200 && rangeResp.status < 300 && rangeResp.buffer) {
|
||||
const headerStr = rangeResp.buffer.slice(0, 4).toString("utf8");
|
||||
magic = headerStr.startsWith("%PDF");
|
||||
} else {
|
||||
magic = false;
|
||||
}
|
||||
}
|
||||
return {
|
||||
ok: headResp.status >= 200 && headResp.status < 300,
|
||||
status: headResp.status,
|
||||
size,
|
||||
magic,
|
||||
headers: headResp.headers
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
ok: false,
|
||||
status: void 0,
|
||||
size: void 0,
|
||||
magic: false,
|
||||
headers: {},
|
||||
error: new Error(String(error))
|
||||
};
|
||||
}
|
||||
}
|
||||
// Annotate the CommonJS export names for ESM import in node:
|
||||
0 && (module.exports = {
|
||||
getHeader
|
||||
});
|
||||
//# sourceMappingURL=index.cjs.map
|
||||
7
node_modules/pdf-parse/dist/node/cjs/index.cjs.map
generated
vendored
Normal file
7
node_modules/pdf-parse/dist/node/cjs/index.cjs.map
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": 3,
|
||||
"sources": ["../../../src/node/index.ts", "../../../src/node/getHeader.ts"],
|
||||
"sourcesContent": ["export type * from './getHeader.js';\nexport * from './getHeader.js';\n", "import * as http from 'node:http';\nimport * as https from 'node:https';\n\n/**\n * Result information from getHeader.\n * @public\n */\nexport interface HeaderResult {\n\tok: boolean;\n\tstatus?: number;\n\tsize?: number;\n\tmagic: boolean | null;\n\theaders?: Record<string, string>;\n\terror?: Error;\n}\n\ninterface RequestResult {\n\tstatus: number;\n\theaders: Record<string, string>;\n\tbuffer?: Buffer;\n}\n\nasync function nodeRequest(u: URL, method: string, headers?: Record<string, string>): Promise<RequestResult> {\n\treturn new Promise((resolve, reject) => {\n\t\tconst reqFn = u.protocol === 'https:' ? https.request : http.request;\n\t\tconst req = reqFn(u, { method, headers }, (res) => {\n\t\t\tconst headersObj: Record<string, string> = {};\n\t\t\tfor (const [k, v] of Object.entries(res.headers)) {\n\t\t\t\theadersObj[k] = Array.isArray(v) ? v.join(',') : (v ?? '');\n\t\t\t}\n\n\t\t\tconst chunks: Buffer[] = [];\n\t\t\tres.on('data', (c) => chunks.push(Buffer.from(c)));\n\t\t\tres.on('end', () => {\n\t\t\t\tconst buffer = chunks.length ? Buffer.concat(chunks) : undefined;\n\t\t\t\tresolve({ status: res.statusCode ?? 0, headers: headersObj, buffer });\n\t\t\t});\n\t\t});\n\n\t\treq.on('error', (err) => reject(err));\n\t\treq.end();\n\t});\n}\n\n/**\n * Perform an HTTP HEAD request to retrieve the file size and verify existence;\n * when `check` is true, fetch a small range and inspect the magic number to confirm the URL points to a valid PDF.\n * If the server does not support range requests, `isPdf` will be set to `false`.\n * @param url - The URL of the PDF file to check. Can be a string or URL object.\n * @param check - When `true`, download a small byte range (first 4 bytes) to validate the file signature by checking for '%PDF' magic bytes. Default: `false`.\n * @returns - A Promise that resolves to a HeaderResult object containing the response status, size, headers, and PDF validation result.\n * @public\n */\nexport async function getHeader(url: string | URL, check: boolean = false): Promise<HeaderResult> {\n\ttry {\n\t\tconst u = typeof url === 'string' ? new URL(url) : url;\n\n\t\tconst headResp = await nodeRequest(u, 'HEAD');\n\t\tconst size = headResp.headers['content-length'] ? parseInt(headResp.headers['content-length'], 10) : undefined;\n\n\t\tlet magic: boolean | null = null;\n\t\tif (check) {\n\t\t\tconst rangeResp = await nodeRequest(u, 'GET', { Range: 'bytes=0-4' });\n\t\t\tif (rangeResp.status >= 200 && rangeResp.status < 300 && rangeResp.buffer) {\n\t\t\t\tconst headerStr = rangeResp.buffer.slice(0, 4).toString('utf8');\n\t\t\t\tmagic = headerStr.startsWith('%PDF');\n\t\t\t} else {\n\t\t\t\tmagic = false;\n\t\t\t}\n\t\t}\n\n\t\treturn {\n\t\t\tok: headResp.status >= 200 && headResp.status < 300,\n\t\t\tstatus: headResp.status,\n\t\t\tsize,\n\t\t\tmagic,\n\t\t\theaders: headResp.headers,\n\t\t};\n\t} catch (error) {\n\t\treturn {\n\t\t\tok: false,\n\t\t\tstatus: undefined,\n\t\t\tsize: undefined,\n\t\t\tmagic: false,\n\t\t\theaders: {},\n\t\t\terror: new Error(String(error)),\n\t\t};\n\t}\n}\n"],
|
||||
"mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,WAAsB;AACtB,YAAuB;AAqBvB,eAAe,YAAY,GAAQ,QAAgB,SAA0D;AAC5G,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACvC,UAAM,QAAQ,EAAE,aAAa,WAAiB,gBAAe;AAC7D,UAAM,MAAM,MAAM,GAAG,EAAE,QAAQ,QAAQ,GAAG,CAAC,QAAQ;AAClD,YAAM,aAAqC,CAAC;AAC5C,iBAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,IAAI,OAAO,GAAG;AACjD,mBAAW,CAAC,IAAI,MAAM,QAAQ,CAAC,IAAI,EAAE,KAAK,GAAG,IAAK,KAAK;AAAA,MACxD;AAEA,YAAM,SAAmB,CAAC;AAC1B,UAAI,GAAG,QAAQ,CAAC,MAAM,OAAO,KAAK,OAAO,KAAK,CAAC,CAAC,CAAC;AACjD,UAAI,GAAG,OAAO,MAAM;AACnB,cAAM,SAAS,OAAO,SAAS,OAAO,OAAO,MAAM,IAAI;AACvD,gBAAQ,EAAE,QAAQ,IAAI,cAAc,GAAG,SAAS,YAAY,OAAO,CAAC;AAAA,MACrE,CAAC;AAAA,IACF,CAAC;AAED,QAAI,GAAG,SAAS,CAAC,QAAQ,OAAO,GAAG,CAAC;AACpC,QAAI,IAAI;AAAA,EACT,CAAC;AACF;AAWA,eAAsB,UAAU,KAAmB,QAAiB,OAA8B;AACjG,MAAI;AACH,UAAM,IAAI,OAAO,QAAQ,WAAW,IAAI,IAAI,GAAG,IAAI;AAEnD,UAAM,WAAW,MAAM,YAAY,GAAG,MAAM;AAC5C,UAAM,OAAO,SAAS,QAAQ,gBAAgB,IAAI,SAAS,SAAS,QAAQ,gBAAgB,GAAG,EAAE,IAAI;AAErG,QAAI,QAAwB;AAC5B,QAAI,OAAO;AACV,YAAM,YAAY,MAAM,YAAY,GAAG,OAAO,EAAE,OAAO,YAAY,CAAC;AACpE,UAAI,UAAU,UAAU,OAAO,UAAU,SAAS,OAAO,UAAU,QAAQ;AAC1E,cAAM,YAAY,UAAU,OAAO,MAAM,GAAG,CAAC,EAAE,SAAS,MAAM;AAC9D,gBAAQ,UAAU,WAAW,MAAM;AAAA,MACpC,OAAO;AACN,gBAAQ;AAAA,MACT;AAAA,IACD;AAEA,WAAO;AAAA,MACN,IAAI,SAAS,UAAU,OAAO,SAAS,SAAS;AAAA,MAChD,QAAQ,SAAS;AAAA,MACjB;AAAA,MACA;AAAA,MACA,SAAS,SAAS;AAAA,IACnB;AAAA,EACD,SAAS,OAAO;AACf,WAAO;AAAA,MACN,IAAI;AAAA,MACJ,QAAQ;AAAA,MACR,MAAM;AAAA,MACN,OAAO;AAAA,MACP,SAAS,CAAC;AAAA,MACV,OAAO,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,IAC/B;AAAA,EACD;AACD;",
|
||||
"names": []
|
||||
}
|
||||
25
node_modules/pdf-parse/dist/node/cjs/index.d.cts
generated
vendored
Normal file
25
node_modules/pdf-parse/dist/node/cjs/index.d.cts
generated
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* Perform an HTTP HEAD request to retrieve the file size and verify existence;
|
||||
* when `check` is true, fetch a small range and inspect the magic number to confirm the URL points to a valid PDF.
|
||||
* If the server does not support range requests, `isPdf` will be set to `false`.
|
||||
* @param url - The URL of the PDF file to check. Can be a string or URL object.
|
||||
* @param check - When `true`, download a small byte range (first 4 bytes) to validate the file signature by checking for '%PDF' magic bytes. Default: `false`.
|
||||
* @returns - A Promise that resolves to a HeaderResult object containing the response status, size, headers, and PDF validation result.
|
||||
* @public
|
||||
*/
|
||||
export declare function getHeader(url: string | URL, check?: boolean): Promise<HeaderResult>;
|
||||
|
||||
/**
|
||||
* Result information from getHeader.
|
||||
* @public
|
||||
*/
|
||||
export declare interface HeaderResult {
|
||||
ok: boolean;
|
||||
status?: number;
|
||||
size?: number;
|
||||
magic: boolean | null;
|
||||
headers?: Record<string, string>;
|
||||
error?: Error;
|
||||
}
|
||||
|
||||
export { }
|
||||
23
node_modules/pdf-parse/dist/node/esm/getHeader.d.ts
generated
vendored
Normal file
23
node_modules/pdf-parse/dist/node/esm/getHeader.d.ts
generated
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
/**
|
||||
* Result information from getHeader.
|
||||
* @public
|
||||
*/
|
||||
export interface HeaderResult {
|
||||
ok: boolean;
|
||||
status?: number;
|
||||
size?: number;
|
||||
magic: boolean | null;
|
||||
headers?: Record<string, string>;
|
||||
error?: Error;
|
||||
}
|
||||
/**
|
||||
* Perform an HTTP HEAD request to retrieve the file size and verify existence;
|
||||
* when `check` is true, fetch a small range and inspect the magic number to confirm the URL points to a valid PDF.
|
||||
* If the server does not support range requests, `isPdf` will be set to `false`.
|
||||
* @param url - The URL of the PDF file to check. Can be a string or URL object.
|
||||
* @param check - When `true`, download a small byte range (first 4 bytes) to validate the file signature by checking for '%PDF' magic bytes. Default: `false`.
|
||||
* @returns - A Promise that resolves to a HeaderResult object containing the response status, size, headers, and PDF validation result.
|
||||
* @public
|
||||
*/
|
||||
export declare function getHeader(url: string | URL, check?: boolean): Promise<HeaderResult>;
|
||||
//# sourceMappingURL=getHeader.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/node/esm/getHeader.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/node/esm/getHeader.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"getHeader.d.ts","sourceRoot":"","sources":["../../../src/node/getHeader.ts"],"names":[],"mappings":"AAGA;;;GAGG;AACH,MAAM,WAAW,YAAY;IAC5B,EAAE,EAAE,OAAO,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,OAAO,GAAG,IAAI,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,KAAK,CAAC,EAAE,KAAK,CAAC;CACd;AA8BD;;;;;;;;GAQG;AACH,wBAAsB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,EAAE,KAAK,GAAE,OAAe,GAAG,OAAO,CAAC,YAAY,CAAC,CAmChG"}
|
||||
66
node_modules/pdf-parse/dist/node/esm/getHeader.js
generated
vendored
Normal file
66
node_modules/pdf-parse/dist/node/esm/getHeader.js
generated
vendored
Normal file
@@ -0,0 +1,66 @@
|
||||
import * as http from 'node:http';
|
||||
import * as https from 'node:https';
|
||||
async function nodeRequest(u, method, headers) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reqFn = u.protocol === 'https:' ? https.request : http.request;
|
||||
const req = reqFn(u, { method, headers }, (res) => {
|
||||
const headersObj = {};
|
||||
for (const [k, v] of Object.entries(res.headers)) {
|
||||
headersObj[k] = Array.isArray(v) ? v.join(',') : (v ?? '');
|
||||
}
|
||||
const chunks = [];
|
||||
res.on('data', (c) => chunks.push(Buffer.from(c)));
|
||||
res.on('end', () => {
|
||||
const buffer = chunks.length ? Buffer.concat(chunks) : undefined;
|
||||
resolve({ status: res.statusCode ?? 0, headers: headersObj, buffer });
|
||||
});
|
||||
});
|
||||
req.on('error', (err) => reject(err));
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Perform an HTTP HEAD request to retrieve the file size and verify existence;
|
||||
* when `check` is true, fetch a small range and inspect the magic number to confirm the URL points to a valid PDF.
|
||||
* If the server does not support range requests, `isPdf` will be set to `false`.
|
||||
* @param url - The URL of the PDF file to check. Can be a string or URL object.
|
||||
* @param check - When `true`, download a small byte range (first 4 bytes) to validate the file signature by checking for '%PDF' magic bytes. Default: `false`.
|
||||
* @returns - A Promise that resolves to a HeaderResult object containing the response status, size, headers, and PDF validation result.
|
||||
* @public
|
||||
*/
|
||||
export async function getHeader(url, check = false) {
|
||||
try {
|
||||
const u = typeof url === 'string' ? new URL(url) : url;
|
||||
const headResp = await nodeRequest(u, 'HEAD');
|
||||
const size = headResp.headers['content-length'] ? parseInt(headResp.headers['content-length'], 10) : undefined;
|
||||
let magic = null;
|
||||
if (check) {
|
||||
const rangeResp = await nodeRequest(u, 'GET', { Range: 'bytes=0-4' });
|
||||
if (rangeResp.status >= 200 && rangeResp.status < 300 && rangeResp.buffer) {
|
||||
const headerStr = rangeResp.buffer.slice(0, 4).toString('utf8');
|
||||
magic = headerStr.startsWith('%PDF');
|
||||
}
|
||||
else {
|
||||
magic = false;
|
||||
}
|
||||
}
|
||||
return {
|
||||
ok: headResp.status >= 200 && headResp.status < 300,
|
||||
status: headResp.status,
|
||||
size,
|
||||
magic,
|
||||
headers: headResp.headers,
|
||||
};
|
||||
}
|
||||
catch (error) {
|
||||
return {
|
||||
ok: false,
|
||||
status: undefined,
|
||||
size: undefined,
|
||||
magic: false,
|
||||
headers: {},
|
||||
error: new Error(String(error)),
|
||||
};
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=getHeader.js.map
|
||||
1
node_modules/pdf-parse/dist/node/esm/getHeader.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/node/esm/getHeader.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"getHeader.js","sourceRoot":"","sources":["../../../src/node/getHeader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,KAAK,KAAK,MAAM,YAAY,CAAC;AAqBpC,KAAK,UAAU,WAAW,CAAC,CAAM,EAAE,MAAc,EAAE,OAAgC;IAClF,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACtC,MAAM,KAAK,GAAG,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;QACrE,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE,CAAC,GAAG,EAAE,EAAE;YACjD,MAAM,UAAU,GAA2B,EAAE,CAAC;YAC9C,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;gBAClD,UAAU,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5D,CAAC;YAED,MAAM,MAAM,GAAa,EAAE,CAAC;YAC5B,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACnD,GAAG,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;gBAClB,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;gBACjE,OAAO,CAAC,EAAE,MAAM,EAAE,GAAG,CAAC,UAAU,IAAI,CAAC,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;YACvE,CAAC,CAAC,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;QACtC,GAAG,CAAC,GAAG,EAAE,CAAC;IACX,CAAC,CAAC,CAAC;AACJ,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,GAAiB,EAAE,QAAiB,KAAK;IACxE,IAAI,CAAC;QACJ,MAAM,CAAC,GAAG,OAAO,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;QAEvD,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;QAC9C,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,gBAAgB,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAE/G,IAAI,KAAK,GAAmB,IAAI,CAAC;QACjC,IAAI,KAAK,EAAE,CAAC;YACX,MAAM,SAAS,GAAG,MAAM,WAAW,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,CAAC,CAAC;YACtE,IAAI,SAAS,CAAC,MAAM,IAAI,GAAG,IAAI,SAAS,CAAC,MAAM,GAAG,GAAG,IAAI,SAAS,CAAC,MAAM,EAAE,CAAC;gBAC3E,MAAM,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;gBAChE,KAAK,GAAG,SAAS,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;YACtC,CAAC;iBAAM,CAAC;gBACP,KAAK,GAAG,KAAK,CAAC;YACf,CAAC;QACF,CAAC;QAED,OAAO;YACN,EAAE,EAAE,QAAQ,CAAC,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,MAAM,GAAG,GAAG;YACnD,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,IAAI;YACJ,KAAK;YACL,OAAO,EAAE,QAAQ,CAAC,OAAO;SACzB,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,OAAO;YACN,EAAE,EAAE,KAAK;YACT,MAAM,EAAE,SAAS;YACjB,IAAI,EAAE,SAAS;YACf,KAAK,EAAE,KAAK;YACZ,OAAO,EAAE,EAAE;YACX,KAAK,EAAE,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;SAC/B,CAAC;IACH,CAAC;AACF,CAAC"}
|
||||
3
node_modules/pdf-parse/dist/node/esm/index.d.ts
generated
vendored
Normal file
3
node_modules/pdf-parse/dist/node/esm/index.d.ts
generated
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
export type * from './getHeader.js';
|
||||
export * from './getHeader.js';
|
||||
//# sourceMappingURL=index.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/node/esm/index.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/node/esm/index.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/node/index.ts"],"names":[],"mappings":"AAAA,mBAAmB,gBAAgB,CAAC;AACpC,cAAc,gBAAgB,CAAC"}
|
||||
2
node_modules/pdf-parse/dist/node/esm/index.js
generated
vendored
Normal file
2
node_modules/pdf-parse/dist/node/esm/index.js
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export * from './getHeader.js';
|
||||
//# sourceMappingURL=index.js.map
|
||||
1
node_modules/pdf-parse/dist/node/esm/index.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/node/esm/index.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/node/index.ts"],"names":[],"mappings":"AACA,cAAc,gBAAgB,CAAC"}
|
||||
2
node_modules/pdf-parse/dist/pdf-parse/cjs/index.cjs
generated
vendored
Normal file
2
node_modules/pdf-parse/dist/pdf-parse/cjs/index.cjs
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
1
node_modules/pdf-parse/dist/pdf-parse/cjs/index.cjs.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/cjs/index.cjs.map
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
898
node_modules/pdf-parse/dist/pdf-parse/cjs/index.d.cts
generated
vendored
Normal file
898
node_modules/pdf-parse/dist/pdf-parse/cjs/index.d.cts
generated
vendored
Normal file
@@ -0,0 +1,898 @@
|
||||
import type { DocumentInitParameters } from 'pdfjs-dist/types/src/display/api.js';
|
||||
import type { ImageKind } from 'pdfjs-dist/legacy/build/pdf.mjs';
|
||||
import { Metadata } from 'pdfjs-dist/types/src/display/metadata.js';
|
||||
import type { PDFDataRangeTransport } from 'pdfjs-dist/types/src/display/api.js';
|
||||
import type { PDFWorker } from 'pdfjs-dist/types/src/display/api.js';
|
||||
import { VerbosityLevel } from 'pdfjs-dist/legacy/build/pdf.mjs';
|
||||
|
||||
/**
|
||||
* Error used to indicate that an operation was aborted (for example by an AbortSignal).
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class AbortException extends Error {
|
||||
/**
|
||||
* Create a new AbortException.
|
||||
* @param message - Optional error message.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, cause?: unknown);
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* Consolidated date information gathered from different PDF sources.
|
||||
* The PDF 'Info' dictionary contains CreationDate / ModDate and
|
||||
* the XMP/XAP metadata can contain several timestamps as well. This
|
||||
* structure collects those values (if present) as JavaScript Date objects
|
||||
* or null when the property exists but cannot be parsed.
|
||||
*/
|
||||
export declare type DateNode = {
|
||||
CreationDate?: Date | null;
|
||||
ModDate?: Date | null;
|
||||
XmpCreateDate?: Date | null;
|
||||
XmpModifyDate?: Date | null;
|
||||
XmpMetadataDate?: Date | null;
|
||||
XapCreateDate?: Date | null;
|
||||
XapModifyDate?: Date | null;
|
||||
XapMetadataDate?: Date | null;
|
||||
};
|
||||
|
||||
/**
|
||||
* @public
|
||||
* EmbeddedImage
|
||||
* - Normalized representation of an embedded image extracted from the PDF.
|
||||
* - `data`: Raw image bytes (e.g. PNG/JPEG) as Uint8Array. Use this for file writing or binary processing.
|
||||
* - `dataUrl`: Optional data URL (e.g. "data:image/png;base64,...") for directly embedding in <img> src.
|
||||
* Storing both lets consumers choose the most convenient form; consider omitting one to save memory.
|
||||
* - `name`: Resource name for the image.
|
||||
* - `width` / `height`: Dimensions in pixels.
|
||||
* - `kind`: ImageKindValue from indicating the pixel format (e.g. GRAYSCALE_1BPP / RGB_24BPP / RGBA_32BPP).
|
||||
*/
|
||||
export declare interface EmbeddedImage {
|
||||
data: Uint8Array;
|
||||
dataUrl: string;
|
||||
name: string;
|
||||
width: number;
|
||||
height: number;
|
||||
kind: ImageKindValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Error thrown when the PDF structure/contents are malformed and cannot be parsed.
|
||||
*
|
||||
* This is raised for low-level format problems detected while reading PDF objects.
|
||||
* Errors caused during parsing PDF data.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class FormatError extends Error {
|
||||
/**
|
||||
* Create a new FormatError.
|
||||
* @param message - Optional message describing the format problem.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, cause?: unknown);
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize arbitrary thrown values into an Error instance used by the library.
|
||||
*
|
||||
* Known Error instances with specific names are mapped to the library's
|
||||
* typed exceptions in order to preserve type information and any additional
|
||||
* fields (for example `details`, `status`, etc.). If the value is not an
|
||||
* Error it is converted to a generic Error containing the stringified value.
|
||||
*
|
||||
* @public
|
||||
* @param error - The thrown value to normalize.
|
||||
* @returns An Error instance representing the provided value.
|
||||
*/
|
||||
export declare function getException(error: unknown): Error;
|
||||
|
||||
/**
|
||||
* @public
|
||||
* ImageKindKey
|
||||
* - Represents the keys of the ImageKind enum (e.g. "GRAYSCALE_1BPP", "RGB_24BPP", "RGBA_32BPP").
|
||||
*/
|
||||
export declare type ImageKindKey = keyof typeof ImageKind;
|
||||
|
||||
/**
|
||||
* @public
|
||||
* ImageKindValue
|
||||
* - Represents the numeric values of the ImageKind enum (e.g. 1, 2, 3).
|
||||
*/
|
||||
export declare type ImageKindValue = (typeof ImageKind)[ImageKindKey];
|
||||
|
||||
/**
|
||||
* @public
|
||||
* ImageResult
|
||||
* Helper container for extracted images grouped per page.
|
||||
*/
|
||||
export declare class ImageResult {
|
||||
pages: Array<PageImages>;
|
||||
total: number;
|
||||
getPageImage(num: number, name: string): EmbeddedImage | null;
|
||||
constructor(total: number);
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* Aggregated information about a PDF document returned by getInfo().
|
||||
* The object contains high-level metadata, outline/bookmark structure,
|
||||
* per-page extracted hyperlinks and utility helpers for parsing dates.
|
||||
*/
|
||||
export declare class InfoResult {
|
||||
total: number;
|
||||
/**
|
||||
* The PDF 'Info' dictionary. Typical fields include title, author, subject,
|
||||
* Creator, Producer and Creation/Modification dates. The exact structure is
|
||||
* determined by the PDF and as returned by PDF.js.
|
||||
*/
|
||||
info?: any;
|
||||
metadata?: Metadata;
|
||||
/**
|
||||
* An array of document fingerprint strings provided by PDF.js. Useful
|
||||
* for caching, de-duplication or identifying a document across runs.
|
||||
*/
|
||||
fingerprints?: Array<string | null>;
|
||||
/**
|
||||
* Permission flags for the document as returned by PDF.js (or null).
|
||||
* These flags indicate capabilities such as printing, copying and
|
||||
* other restrictions imposed by the PDF security settings.
|
||||
*/
|
||||
permission?: number[] | null;
|
||||
/**
|
||||
* Optional document outline (bookmarks). When present this is the
|
||||
* hierarchical navigation structure which viewers use for quick access.
|
||||
*/
|
||||
outline?: Array<OutlineNode> | null;
|
||||
pages: Array<PageLinkResult>;
|
||||
/**
|
||||
* Collects dates from different sources (Info dictionary and XMP/XAP metadata)
|
||||
* and returns them as a DateNode where available. This helps callers compare
|
||||
* and choose the most relevant timestamp (for example a creation date vs XMP date).
|
||||
*/
|
||||
getDateNode(): DateNode;
|
||||
/**
|
||||
* Try to parse an ISO-8601 date string from XMP/XAP metadata. If the
|
||||
* value is falsy or cannot be parsed, undefined is returned to indicate
|
||||
* absence or unparsable input.
|
||||
*/
|
||||
private parseISODateString;
|
||||
constructor(total: number);
|
||||
}
|
||||
|
||||
/**
|
||||
* Error thrown when the parsed data is not a valid PDF document.
|
||||
*
|
||||
* Use this exception to signal that the input cannot be interpreted as a PDF
|
||||
* (corrupt file, invalid header, etc.).
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class InvalidPDFException extends Error {
|
||||
/**
|
||||
* Create a new InvalidPDFException.
|
||||
* @param message - Optional error message.
|
||||
* @param cause - Optional underlying cause (preserved on modern runtimes).
|
||||
*/
|
||||
constructor(message?: string, cause?: unknown);
|
||||
}
|
||||
|
||||
export declare class Line extends Shape {
|
||||
from: Point;
|
||||
to: Point;
|
||||
direction: LineDirection;
|
||||
length: number;
|
||||
intersections: Array<Point>;
|
||||
gaps: Array<Line>;
|
||||
constructor(from: Point, to: Point);
|
||||
private init;
|
||||
private _valid;
|
||||
get valid(): boolean;
|
||||
get normalized(): Line;
|
||||
addGap(line: Line): void;
|
||||
containsPoint(p: Point): boolean;
|
||||
addIntersectionPoint(point: Point): void;
|
||||
intersection(line: Line): Point | undefined;
|
||||
transform(matrix: Array<number>): this;
|
||||
}
|
||||
|
||||
export declare enum LineDirection {
|
||||
None = 0,
|
||||
Horizontal = 1,
|
||||
Vertical = 2
|
||||
}
|
||||
|
||||
export declare class LineStore {
|
||||
hLines: Array<Line>;
|
||||
vLines: Array<Line>;
|
||||
add(line: Line): void;
|
||||
addRectangle(rect: Rectangle): void;
|
||||
getTableData(): Array<TableData>;
|
||||
getTables(): Array<Table>;
|
||||
normalize(): void;
|
||||
normalizeHorizontal(): void;
|
||||
normalizeVertical(): void;
|
||||
private fillTable;
|
||||
private tryFill;
|
||||
private margeHorizontalLines;
|
||||
private margeVerticalLines;
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* LoadParameters
|
||||
* PDF loading parameters.
|
||||
*/
|
||||
export declare interface LoadParameters extends DocumentInitParameters {
|
||||
/**
|
||||
* The URL of the PDF.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
url?: string | URL | undefined;
|
||||
/**
|
||||
* Binary PDF data.
|
||||
* Use TypedArrays (e.g., `Uint8Array`) to improve memory usage. If PDF data is BASE64-encoded, use `atob()` to convert it to a binary string first.
|
||||
* **NOTE**: If TypedArrays are used, they will generally be transferred to the worker thread, reducing main-thread memory usage but taking ownership of the array.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
data?: string | number[] | ArrayBuffer | TypedArray | undefined;
|
||||
/**
|
||||
* Basic authentication headers.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
httpHeaders?: Object | undefined;
|
||||
/**
|
||||
* Indicates whether cross-site Access-Control requests should be made using credentials (e.g., cookies or auth headers).
|
||||
* Default: `false`.
|
||||
*/
|
||||
withCredentials?: boolean | undefined;
|
||||
/**
|
||||
* For decrypting password-protected PDFs.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
password?: string | undefined;
|
||||
/**
|
||||
* The PDF file length. Used for progress reports and range requests.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
length?: number | undefined;
|
||||
/**
|
||||
* Allows using a custom range transport implementation.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
range?: PDFDataRangeTransport | undefined;
|
||||
/**
|
||||
* Maximum number of bytes fetched per range request.
|
||||
* Default: `65536` (`2^16`).
|
||||
*/
|
||||
rangeChunkSize?: number | undefined;
|
||||
/**
|
||||
* The worker used for loading and parsing PDF data.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
worker?: PDFWorker | undefined;
|
||||
/**
|
||||
* Controls logging level; use constants from `VerbosityLevel`.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
verbosity?: number | undefined;
|
||||
/**
|
||||
* Base URL of the document, used to resolve relative URLs in annotations and outline items.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
docBaseUrl?: string | undefined;
|
||||
/**
|
||||
* URL where predefined Adobe CMaps are located. Include trailing slash.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
cMapUrl?: string | undefined;
|
||||
/**
|
||||
* Specifies if Adobe CMaps are binary-packed.
|
||||
* Default: `true`.
|
||||
*/
|
||||
cMapPacked?: boolean | undefined;
|
||||
/**
|
||||
* Factory for reading built-in CMap files.
|
||||
* Default: `{DOMCMapReaderFactory}`.
|
||||
*/
|
||||
CMapReaderFactory?: Object | undefined;
|
||||
/**
|
||||
* URL where predefined ICC profiles are located. Include trailing slash.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
iccUrl?: string | undefined;
|
||||
/**
|
||||
* If `true`, non-embedded fonts fall back to system fonts.
|
||||
* Default: `true` in browsers, `false` in Node.js (unless `disableFontFace === true`, then always `false`).
|
||||
*/
|
||||
useSystemFonts?: boolean | undefined;
|
||||
/**
|
||||
* URL for standard font files. Include trailing slash.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
standardFontDataUrl?: string | undefined;
|
||||
/**
|
||||
* Factory for reading standard font files.
|
||||
* Default: `{DOMStandardFontDataFactory}`.
|
||||
*/
|
||||
StandardFontDataFactory?: Object | undefined;
|
||||
/**
|
||||
* URL for WebAssembly files. Include trailing slash.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
wasmUrl?: string | undefined;
|
||||
/**
|
||||
* Factory for reading WASM files.
|
||||
* Default: `{DOMWasmFactory}`.
|
||||
*/
|
||||
WasmFactory?: Object | undefined;
|
||||
/**
|
||||
* Enable `fetch()` in worker thread for CMap/font/WASM files. If `true`, factory options are ignored.
|
||||
* Default: `true` in browsers, `false` in Node.js.
|
||||
*/
|
||||
useWorkerFetch?: boolean | undefined;
|
||||
/**
|
||||
* Attempt to use WebAssembly for better performance (e.g., image decoding).
|
||||
* Default: `true`.
|
||||
*/
|
||||
useWasm?: boolean | undefined;
|
||||
/**
|
||||
* Reject promises (e.g., `getTextContent`) on parse errors instead of recovering partially.
|
||||
* Default: `false`.
|
||||
*/
|
||||
stopAtErrors?: boolean | undefined;
|
||||
/**
|
||||
* Max image size in total pixels (`width * height`). Use `-1` for no limit.
|
||||
* Default: `-1`.
|
||||
*/
|
||||
maxImageSize?: number | undefined;
|
||||
/**
|
||||
* Whether evaluating strings as JS is allowed (for PDF function performance).
|
||||
* Default: `true`.
|
||||
*/
|
||||
isEvalSupported?: boolean | undefined;
|
||||
/**
|
||||
* Whether `OffscreenCanvas` can be used in worker.
|
||||
* Default: `true` in browsers, `false` in Node.js.
|
||||
*/
|
||||
isOffscreenCanvasSupported?: boolean | undefined;
|
||||
/**
|
||||
* Whether `ImageDecoder` can be used in worker.
|
||||
* Default: `true` in browsers, `false` in Node.js.
|
||||
* **NOTE**: Temporarily disabled in Chromium due to bugs:
|
||||
* - Crashes with BMP decoder on huge images ([issue 374807001](https://issues.chromium.org/issues/374807001))
|
||||
* - Broken JPEGs with custom color profiles ([issue 378869810](https://issues.chromium.org/issues/378869810))
|
||||
*/
|
||||
isImageDecoderSupported?: boolean | undefined;
|
||||
/**
|
||||
* Used to determine when to resize images (via `OffscreenCanvas`). Use `-1` to use a slower fallback algorithm.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
canvasMaxAreaInBytes?: number | undefined;
|
||||
/**
|
||||
* Disable `@font-face`/Font Loading API; use built-in glyph renderer instead.
|
||||
* Default: `false` in browsers, `true` in Node.js.
|
||||
*/
|
||||
disableFontFace?: boolean | undefined;
|
||||
/**
|
||||
* Include extra (non-rendering) font properties when exporting font data from worker. Increases memory usage.
|
||||
* Default: `false`.
|
||||
*/
|
||||
fontExtraProperties?: boolean | undefined;
|
||||
/**
|
||||
* Render XFA forms if present.
|
||||
* Default: `false`.
|
||||
*/
|
||||
enableXfa?: boolean | undefined;
|
||||
/**
|
||||
* Explicit document context for creating elements and loading resources. Defaults to current document.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
ownerDocument?: HTMLDocument | undefined;
|
||||
/**
|
||||
* Disable range requests for PDF loading.
|
||||
* Default: `false`.
|
||||
*/
|
||||
disableRange?: boolean | undefined;
|
||||
/**
|
||||
* Disable streaming PDF data.
|
||||
* Default: `false`.
|
||||
*/
|
||||
disableStream?: boolean | undefined;
|
||||
/**
|
||||
* Disable pre-fetching of PDF data. Requires `disableStream: true` to work fully.
|
||||
* Default: `false`.
|
||||
*/
|
||||
disableAutoFetch?: boolean | undefined;
|
||||
/**
|
||||
* Enable debugging hooks (see `web/debugger.js`).
|
||||
* Default: `false`.
|
||||
*/
|
||||
pdfBug?: boolean | undefined;
|
||||
/**
|
||||
* Factory for creating canvases.
|
||||
* Default: `{DOMCanvasFactory}`.
|
||||
*/
|
||||
CanvasFactory?: Object | undefined;
|
||||
/**
|
||||
* Factory for creating SVG filters during rendering.
|
||||
* Default: `{DOMFilterFactory}`.
|
||||
*/
|
||||
FilterFactory?: Object | undefined;
|
||||
/**
|
||||
* Enable hardware acceleration for rendering.
|
||||
* Default: `false`.
|
||||
*/
|
||||
enableHWA?: boolean | undefined;
|
||||
}
|
||||
|
||||
export { Metadata }
|
||||
|
||||
/**
|
||||
* @public
|
||||
* Node representing a single item in the PDF outline (bookmarks).
|
||||
* This mirrors the structure returned by PDF.js' getOutline() API.
|
||||
*/
|
||||
export declare interface OutlineNode {
|
||||
title: string;
|
||||
bold: boolean;
|
||||
italic: boolean;
|
||||
color: Uint8ClampedArray;
|
||||
dest: string | Array<any> | null;
|
||||
url: string | null;
|
||||
unsafeUrl?: string;
|
||||
newWindow?: boolean;
|
||||
count?: number;
|
||||
items: Array<any>;
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* PageImages
|
||||
* - Represents all embedded images found on a single PDF page.
|
||||
* - pageNumber: 1-based page index.
|
||||
* - images: Array of EmbeddedImage objects for this page.
|
||||
*/
|
||||
export declare interface PageImages {
|
||||
pageNumber: number;
|
||||
images: EmbeddedImage[];
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* Per-page link extraction result.
|
||||
* - pageNumber: the physical page index (1-based) within the PDF document.
|
||||
* - pageLabel: optional printed page label shown by PDF viewers (e.g. "iii", "1", "A-1");
|
||||
* this can differ from the physical page number and may be undefined
|
||||
* when the document does not provide labels.
|
||||
* - links: array of text->URL mappings that were found/overlaid on the page.
|
||||
* - width/height: page dimensions in PDF units for the viewport used.
|
||||
*/
|
||||
export declare type PageLinkResult = {
|
||||
pageNumber: number;
|
||||
pageLabel?: string | null;
|
||||
links: Array<{
|
||||
text: string;
|
||||
url: string;
|
||||
}>;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* @public
|
||||
* PageTableResult
|
||||
*/
|
||||
export declare interface PageTableResult {
|
||||
num: number;
|
||||
tables: TableArray[];
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* PageTextResult
|
||||
*/
|
||||
export declare interface PageTextResult {
|
||||
num: number;
|
||||
text: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* ParseParameters
|
||||
* Options to control parsing behavior and output formatting.
|
||||
*/
|
||||
export declare interface ParseParameters {
|
||||
/**
|
||||
* Array of page numbers to parse.
|
||||
* When provided, only these pages will be parsed and returned in the same order.
|
||||
* Example: [1, 3, 5]. Parse only one page: [7].
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
partial?: Array<number>;
|
||||
/**
|
||||
* Parse the first N pages (pages 1..N).
|
||||
* Ignored when `partial` is provided. If both `first` and `last` are set, they define
|
||||
* an explicit inclusive page range (first..last) and this "first N" semantics is ignored.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
first?: number;
|
||||
/**
|
||||
* Parse the last N pages (pages total-N+1..total).
|
||||
* Ignored when `partial` is provided. If both `first` and `last` are set, they define
|
||||
* an explicit inclusive page range (first..last) and this "last N" semantics is ignored.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
last?: number;
|
||||
/**
|
||||
* Collect per-page metadata such as embedded links, title, pageLabel, and dimensions;
|
||||
* ISBN, DOI, abstract, and references are work in progress when getInfo() is used.
|
||||
* Default: `false`.
|
||||
*/
|
||||
parsePageInfo?: boolean;
|
||||
/**
|
||||
* Attempt to detect and include hyperlink annotations (e.g. URLs) associated with text.
|
||||
* Detected links are formatted as Markdown inline links (for example: [text](https://example.com)).
|
||||
* Default: `false`.
|
||||
*/
|
||||
parseHyperlinks?: boolean;
|
||||
/**
|
||||
* Enforce logical line breaks by inserting a newline when the vertical distance
|
||||
* between text items exceeds `lineThreshold`.
|
||||
* Useful to preserve paragraph/line structure when text items are emitted as separate segments.
|
||||
* Default: `true`.
|
||||
*/
|
||||
lineEnforce?: boolean;
|
||||
/**
|
||||
* Threshold to decide whether nearby text items belong to different lines.
|
||||
* Larger values make the parser more likely to start a new line between items.
|
||||
* Default: `4.6`.
|
||||
*/
|
||||
lineThreshold?: number;
|
||||
/**
|
||||
* String inserted between text items on the same line when a sufficiently large horizontal gap is detected.
|
||||
* Typically used to emulate a cell/column separator (for example, "\\t" for tabs).
|
||||
* Default: `'\t'`.
|
||||
*/
|
||||
cellSeparator?: string;
|
||||
/**
|
||||
* Horizontal distance threshold to decide when two text items on the same baseline should be treated as separate cells.
|
||||
* Larger value produces fewer (wider) cells; smaller value creates more cell breaks.
|
||||
* Default: `7`.
|
||||
*/
|
||||
cellThreshold?: number;
|
||||
/**
|
||||
* Optional string appended at the end of each page's extracted text to mark page boundaries.
|
||||
* Supports placeholders `page_number` and `total_number` which are substituted accordingly.
|
||||
* If omitted or empty, no page boundary marker is added.
|
||||
* Default: `'\n-- page_number of total_number --'`.
|
||||
*/
|
||||
pageJoiner?: string;
|
||||
/**
|
||||
* Optional string used to join text items when returning a page's text.
|
||||
* If provided, this value is used instead of the default empty-string joining behavior.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
itemJoiner?: string;
|
||||
/**
|
||||
* Minimum image dimension (in pixels) for width or height.
|
||||
* When set, images where width OR height are below or equal this value will be ignored by `getImage()`.
|
||||
* Useful for excluding tiny decorative or tracking images.
|
||||
* Default: `80`.
|
||||
* Disable: `0`.
|
||||
*/
|
||||
imageThreshold?: number;
|
||||
/**
|
||||
* Screenshot scale factor: use 1 for the original size, 1.5 for a 50% larger image, etc.
|
||||
* Default: `1`.
|
||||
*/
|
||||
scale?: number;
|
||||
/**
|
||||
* Desired screenshot width in pixels.
|
||||
* When set, the scale option is ignored.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
desiredWidth?: number;
|
||||
/**
|
||||
* Applies to both getImage() and getScreenshot(): include the image as a base64 data URL string.
|
||||
* Default: `true`.
|
||||
*/
|
||||
imageDataUrl?: boolean;
|
||||
/**
|
||||
* Applies to both getImage() and getScreenshot(): include the image as a binary buffer.
|
||||
* Default: `true`.
|
||||
*/
|
||||
imageBuffer?: boolean;
|
||||
/**
|
||||
* Include marked content items in the items array of TextContent to capture PDF "marked content".
|
||||
* Enables tags (MCID, role/props) and structural/accessibility information useful for mapping text ↔ structure.
|
||||
* For plain text extraction it's usually false (trade-off: larger output).
|
||||
* Default: `false`.
|
||||
*/
|
||||
includeMarkedContent?: boolean;
|
||||
/**
|
||||
* When true, text normalization is NOT performed in the worker thread.
|
||||
* For plain text extraction, normalizing in the worker (false) is usually recommended.
|
||||
* Default: `false`.
|
||||
*/
|
||||
disableNormalization?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Error indicating a PDF file requires a password or the provided password is incorrect.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class PasswordException extends Error {
|
||||
/**
|
||||
* Create a new PasswordException.
|
||||
* @param message - Optional error message.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, cause?: unknown);
|
||||
}
|
||||
|
||||
export { PDFDataRangeTransport }
|
||||
|
||||
/**
|
||||
* @public
|
||||
* Loads PDF documents and exposes helpers for text, image, table, metadata, and screenshot extraction.
|
||||
*/
|
||||
export declare class PDFParse {
|
||||
private readonly options;
|
||||
private doc;
|
||||
progress: {
|
||||
loaded: number;
|
||||
total: number;
|
||||
};
|
||||
/**
|
||||
* Create a new parser with `LoadParameters`.
|
||||
* Converts Node.js `Buffer` data to `Uint8Array` automatically and ensures a default verbosity level.
|
||||
* @param options - Initialization parameters.
|
||||
*/
|
||||
constructor(options: LoadParameters);
|
||||
destroy(): Promise<void>;
|
||||
static get isNodeJS(): boolean;
|
||||
static setWorker(workerSrc?: string): string;
|
||||
/**
|
||||
* Load document-level metadata (info, outline, permissions, page labels) and optionally gather per-page link details.
|
||||
* @param params - Parse options; set `parsePageInfo` to collect per-page metadata described in `ParseParameters`.
|
||||
* @returns Aggregated document metadata in an `InfoResult`.
|
||||
*/
|
||||
getInfo(params?: ParseParameters): Promise<InfoResult>;
|
||||
private getPageLinks;
|
||||
/**
|
||||
* Extract plain text for each requested page, optionally enriching hyperlinks and enforcing line or cell separators.
|
||||
* @param params - Parse options controlling pagination, link handling, and line/cell thresholds.
|
||||
* @returns A `TextResult` containing page-wise text and a concatenated document string.
|
||||
*/
|
||||
getText(params?: ParseParameters): Promise<TextResult>;
|
||||
private load;
|
||||
private shouldParse;
|
||||
private getPageText;
|
||||
private getHyperlinks;
|
||||
/**
|
||||
* Extract embedded images from requested pages.
|
||||
*
|
||||
* Behavior notes:
|
||||
* - Pages are selected according to ParseParameters (partial, first, last).
|
||||
* - Images smaller than `params.imageThreshold` (width OR height) are skipped.
|
||||
* - Returned ImageResult contains per-page PageImages; each image entry includes:
|
||||
* - data: Uint8Array (present when params.imageBuffer === true)
|
||||
* - dataUrl: string (present when params.imageDataUrl === true)
|
||||
* - width, height, kind, name
|
||||
* - Works in both Node.js (canvas.toBuffer) and browser (canvas.toDataURL) environments.
|
||||
*
|
||||
* @param params - ParseParameters controlling page selection, thresholds and output format.
|
||||
* @returns Promise<ImageResult> with extracted images grouped by page.
|
||||
*/
|
||||
getImage(params?: ParseParameters): Promise<ImageResult>;
|
||||
private convertToRGBA;
|
||||
private resolveEmbeddedImage;
|
||||
/**
|
||||
* Render pages to raster screenshots.
|
||||
*
|
||||
* Behavior notes:
|
||||
* - Pages are selected according to ParseParameters (partial, first, last).
|
||||
* - Use params.scale for zoom; if params.desiredWidth is specified it takes precedence.
|
||||
* - Each ScreenshotResult page contains:
|
||||
* - data: Uint8Array (when params.imageBuffer === true)
|
||||
* - dataUrl: string (when params.imageDataUrl === true)
|
||||
* - pageNumber, width, height, scale
|
||||
* - Works in both Node.js (canvas.toBuffer) and browser (canvas.toDataURL) environments.
|
||||
*
|
||||
* @param parseParams - ParseParameters controlling page selection and render options.
|
||||
* @returns Promise<ScreenshotResult> with rendered page images.
|
||||
*/
|
||||
getScreenshot(parseParams?: ParseParameters): Promise<ScreenshotResult>;
|
||||
/**
|
||||
* Detect and extract tables from pages by analysing vector drawing operators, then populate cells with text.
|
||||
*
|
||||
* Behavior notes:
|
||||
* - Scans operator lists for rectangles/lines that form table grids (uses PathGeometry and LineStore).
|
||||
* - Normalizes detected geometry and matches positioned text to table cells.
|
||||
* - Honors ParseParameters for page selection.
|
||||
*
|
||||
* @param params - ParseParameters controlling which pages to analyse (partial/first/last).
|
||||
* @returns Promise<TableResult> containing discovered tables per page.
|
||||
*/
|
||||
getTable(params?: ParseParameters): Promise<TableResult>;
|
||||
private getPathGeometry;
|
||||
private getPageTables;
|
||||
private fillPageTables;
|
||||
}
|
||||
|
||||
export { PDFWorker }
|
||||
|
||||
export declare class Point extends Shape {
|
||||
x: number;
|
||||
y: number;
|
||||
constructor(x: number, y: number);
|
||||
equal(point: Point): boolean;
|
||||
transform(matrix: Array<number>): this;
|
||||
}
|
||||
|
||||
export declare class Rectangle extends Shape {
|
||||
from: Point;
|
||||
width: number;
|
||||
height: number;
|
||||
constructor(from: Point, width: number, height: number);
|
||||
get to(): Point;
|
||||
getLines(): Line[];
|
||||
transform(matrix: Array<number>): this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents an HTTP/network response error encountered while fetching PDF data.
|
||||
*
|
||||
* The `status` and `missing` properties mirror values that may be provided
|
||||
* by the underlying PDF library's network layer.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class ResponseException extends Error {
|
||||
/**
|
||||
* Create a new ResponseException.
|
||||
* @param message - Optional error message.
|
||||
* @param status - Optional numeric HTTP/status code.
|
||||
* @param missing - Optional field describing missing resources.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, status?: number, missing?: unknown, cause?: unknown);
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* SafeParseParameters
|
||||
*/
|
||||
export declare type SafeParseParameters = Required<Pick<ParseParameters, 'lineThreshold' | 'cellThreshold' | 'scale'>> & ParseParameters;
|
||||
|
||||
/**
|
||||
* @public
|
||||
* Screenshot
|
||||
*/
|
||||
export declare interface Screenshot {
|
||||
data: Uint8Array;
|
||||
dataUrl: string;
|
||||
pageNumber: number;
|
||||
width: number;
|
||||
height: number;
|
||||
scale: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* ScreenshotResult
|
||||
*/
|
||||
export declare class ScreenshotResult {
|
||||
pages: Array<Screenshot>;
|
||||
total: number;
|
||||
constructor(total: number);
|
||||
}
|
||||
|
||||
export declare function setDefaultParseParameters(params: ParseParameters): SafeParseParameters;
|
||||
|
||||
export declare abstract class Shape {
|
||||
static tolerance: number;
|
||||
abstract transform(matrix: Array<number>): this;
|
||||
static applyTransform(p: Array<number>, m: Array<number>): Array<number>;
|
||||
}
|
||||
|
||||
export declare class Table {
|
||||
hLines: Array<Line>;
|
||||
vLines: Array<Line>;
|
||||
constructor(line: Line);
|
||||
get isValid(): boolean;
|
||||
get rowPivots(): Array<number>;
|
||||
get colPivots(): Array<number>;
|
||||
add(line: Line): boolean;
|
||||
private intersection;
|
||||
private getSameHorizontal;
|
||||
private getSameVertical;
|
||||
private mergeHorizontalLines;
|
||||
private mergeVerticalLines;
|
||||
normalize(): void;
|
||||
verticalExists(line: Line, y1: number, y2: number): boolean;
|
||||
horizontalExists(line: Line, x1: number, x2: number): boolean;
|
||||
private findBottomLineIndex;
|
||||
private findVerticalLineIndexs;
|
||||
private getRow;
|
||||
toData(): TableData;
|
||||
}
|
||||
|
||||
export declare type TableArray = Array<Array<string>>;
|
||||
|
||||
declare type TableCell = {
|
||||
minXY: Point;
|
||||
maxXY: Point;
|
||||
width: number;
|
||||
height: number;
|
||||
colspan?: number;
|
||||
rowspan?: number;
|
||||
text: Array<string>;
|
||||
};
|
||||
|
||||
declare class TableData {
|
||||
minXY: Point;
|
||||
maxXY: Point;
|
||||
rows: Array<TableRow>;
|
||||
private rowPivots;
|
||||
private colPivots;
|
||||
constructor(minXY: Point, maxXY: Point, rowPivots: Array<number>, colPivots: Array<number>);
|
||||
findCell(x: number, y: number): TableCell | undefined;
|
||||
get cellCount(): number;
|
||||
get rowCount(): number;
|
||||
check(): boolean;
|
||||
toArray(): string[][];
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* TableResult
|
||||
*/
|
||||
export declare class TableResult {
|
||||
pages: Array<PageTableResult>;
|
||||
mergedTables: TableArray[];
|
||||
total: number;
|
||||
constructor(total: number);
|
||||
}
|
||||
|
||||
declare type TableRow = Array<TableCell>;
|
||||
|
||||
/**
|
||||
* @public
|
||||
* TextResult
|
||||
*/
|
||||
export declare class TextResult {
|
||||
pages: Array<PageTextResult>;
|
||||
text: string;
|
||||
total: number;
|
||||
getPageText(num: number): string;
|
||||
constructor(total: number);
|
||||
}
|
||||
|
||||
export declare type TypedArray = Int8Array | Uint8Array | Uint8ClampedArray | Int16Array | Uint16Array | Int32Array | Uint32Array | Float32Array | Float64Array;
|
||||
|
||||
/**
|
||||
* Generic wrapper for errors where the library cannot classify the cause.
|
||||
*
|
||||
* The `details` property may contain additional information provided by the
|
||||
* underlying PDF library.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class UnknownErrorException extends Error {
|
||||
/**
|
||||
* Create a new UnknownErrorException.
|
||||
* @param message - Optional error message.
|
||||
* @param details - Optional additional details from the PDF library.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, details?: unknown, cause?: unknown);
|
||||
}
|
||||
|
||||
export { VerbosityLevel }
|
||||
|
||||
export { }
|
||||
28
node_modules/pdf-parse/dist/pdf-parse/cjs/pdf.worker.mjs
generated
vendored
Normal file
28
node_modules/pdf-parse/dist/pdf-parse/cjs/pdf.worker.mjs
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
107
node_modules/pdf-parse/dist/pdf-parse/esm/Exception.d.ts
generated
vendored
Normal file
107
node_modules/pdf-parse/dist/pdf-parse/esm/Exception.d.ts
generated
vendored
Normal file
@@ -0,0 +1,107 @@
|
||||
/**
|
||||
* Error thrown when the parsed data is not a valid PDF document.
|
||||
*
|
||||
* Use this exception to signal that the input cannot be interpreted as a PDF
|
||||
* (corrupt file, invalid header, etc.).
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class InvalidPDFException extends Error {
|
||||
/**
|
||||
* Create a new InvalidPDFException.
|
||||
* @param message - Optional error message.
|
||||
* @param cause - Optional underlying cause (preserved on modern runtimes).
|
||||
*/
|
||||
constructor(message?: string, cause?: unknown);
|
||||
}
|
||||
/**
|
||||
* Error indicating a PDF file requires a password or the provided password is incorrect.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class PasswordException extends Error {
|
||||
/**
|
||||
* Create a new PasswordException.
|
||||
* @param message - Optional error message.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, cause?: unknown);
|
||||
}
|
||||
/**
|
||||
* Error thrown when the PDF structure/contents are malformed and cannot be parsed.
|
||||
*
|
||||
* This is raised for low-level format problems detected while reading PDF objects.
|
||||
* Errors caused during parsing PDF data.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class FormatError extends Error {
|
||||
/**
|
||||
* Create a new FormatError.
|
||||
* @param message - Optional message describing the format problem.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, cause?: unknown);
|
||||
}
|
||||
/**
|
||||
* Generic wrapper for errors where the library cannot classify the cause.
|
||||
*
|
||||
* The `details` property may contain additional information provided by the
|
||||
* underlying PDF library.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class UnknownErrorException extends Error {
|
||||
/**
|
||||
* Create a new UnknownErrorException.
|
||||
* @param message - Optional error message.
|
||||
* @param details - Optional additional details from the PDF library.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, details?: unknown, cause?: unknown);
|
||||
}
|
||||
/**
|
||||
* Represents an HTTP/network response error encountered while fetching PDF data.
|
||||
*
|
||||
* The `status` and `missing` properties mirror values that may be provided
|
||||
* by the underlying PDF library's network layer.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class ResponseException extends Error {
|
||||
/**
|
||||
* Create a new ResponseException.
|
||||
* @param message - Optional error message.
|
||||
* @param status - Optional numeric HTTP/status code.
|
||||
* @param missing - Optional field describing missing resources.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, status?: number, missing?: unknown, cause?: unknown);
|
||||
}
|
||||
/**
|
||||
* Error used to indicate that an operation was aborted (for example by an AbortSignal).
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class AbortException extends Error {
|
||||
/**
|
||||
* Create a new AbortException.
|
||||
* @param message - Optional error message.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, cause?: unknown);
|
||||
}
|
||||
/**
|
||||
* Normalize arbitrary thrown values into an Error instance used by the library.
|
||||
*
|
||||
* Known Error instances with specific names are mapped to the library's
|
||||
* typed exceptions in order to preserve type information and any additional
|
||||
* fields (for example `details`, `status`, etc.). If the value is not an
|
||||
* Error it is converted to a generic Error containing the stringified value.
|
||||
*
|
||||
* @public
|
||||
* @param error - The thrown value to normalize.
|
||||
* @returns An Error instance representing the provided value.
|
||||
*/
|
||||
export declare function getException(error: unknown): Error;
|
||||
//# sourceMappingURL=Exception.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/Exception.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/Exception.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"Exception.d.ts","sourceRoot":"","sources":["../../../src/pdf-parse/Exception.ts"],"names":[],"mappings":"AAEA;;;;;;;GAOG;AACH,qBAAa,mBAAoB,SAAQ,KAAK;IAC7C;;;;OAIG;gBACS,OAAO,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,OAAO;CAkB7C;AAED;;;;GAIG;AACH,qBAAa,iBAAkB,SAAQ,KAAK;IAC3C;;;;OAIG;gBACS,OAAO,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,OAAO;CAa7C;AAED;;;;;;;GAOG;AACH,qBAAa,WAAY,SAAQ,KAAK;IACrC;;;;OAIG;gBACS,OAAO,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,OAAO;CAa7C;AAED;;;;;;;GAOG;AACH,qBAAa,qBAAsB,SAAQ,KAAK;IAC/C;;;;;OAKG;gBACS,OAAO,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,OAAO;CAchE;AAED;;;;;;;GAOG;AACH,qBAAa,iBAAkB,SAAQ,KAAK;IAC3C;;;;;;OAMG;gBACS,OAAO,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,OAAO;CAejF;AAED;;;;GAIG;AACH,qBAAa,cAAe,SAAQ,KAAK;IACxC;;;;OAIG;gBACS,OAAO,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,OAAO;CAY7C;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,CAyBlD"}
|
||||
217
node_modules/pdf-parse/dist/pdf-parse/esm/Exception.js
generated
vendored
Normal file
217
node_modules/pdf-parse/dist/pdf-parse/esm/Exception.js
generated
vendored
Normal file
@@ -0,0 +1,217 @@
|
||||
/* biome-ignore-all lint/suspicious/noExplicitAny: underline-type */
|
||||
/**
|
||||
* Error thrown when the parsed data is not a valid PDF document.
|
||||
*
|
||||
* Use this exception to signal that the input cannot be interpreted as a PDF
|
||||
* (corrupt file, invalid header, etc.).
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export class InvalidPDFException extends Error {
|
||||
/**
|
||||
* Create a new InvalidPDFException.
|
||||
* @param message - Optional error message.
|
||||
* @param cause - Optional underlying cause (preserved on modern runtimes).
|
||||
*/
|
||||
constructor(message, cause) {
|
||||
if (cause !== undefined) {
|
||||
// Use modern ErrorOptions to attach cause when supported
|
||||
super(message ?? 'Invalid PDF', { cause });
|
||||
}
|
||||
else {
|
||||
super(message ?? 'Invalid PDF');
|
||||
}
|
||||
this.name = 'InvalidPDFException';
|
||||
// Fix TS/ES prototype chain (required)
|
||||
Object.setPrototypeOf(this, InvalidPDFException.prototype);
|
||||
// preserve native stack trace where available
|
||||
if (typeof Error.captureStackTrace === 'function') {
|
||||
Error.captureStackTrace(this, InvalidPDFException);
|
||||
}
|
||||
// If you need to support older TS/targets that don't accept ErrorOptions,
|
||||
// replace the above super(...) with super(...); and uncomment:
|
||||
// if (cause !== undefined) (this as any).cause = cause;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Error indicating a PDF file requires a password or the provided password is incorrect.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export class PasswordException extends Error {
|
||||
/**
|
||||
* Create a new PasswordException.
|
||||
* @param message - Optional error message.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message, cause) {
|
||||
if (cause !== undefined) {
|
||||
super(message ?? 'Password required or incorrect', { cause });
|
||||
}
|
||||
else {
|
||||
super(message ?? 'Password required or incorrect');
|
||||
}
|
||||
this.name = 'PasswordException';
|
||||
Object.setPrototypeOf(this, PasswordException.prototype);
|
||||
if (typeof Error.captureStackTrace === 'function') {
|
||||
Error.captureStackTrace(this, PasswordException);
|
||||
}
|
||||
// Fallback for older targets: if needed use (this as any).cause = cause;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Error thrown when the PDF structure/contents are malformed and cannot be parsed.
|
||||
*
|
||||
* This is raised for low-level format problems detected while reading PDF objects.
|
||||
* Errors caused during parsing PDF data.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export class FormatError extends Error {
|
||||
/**
|
||||
* Create a new FormatError.
|
||||
* @param message - Optional message describing the format problem.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message, cause) {
|
||||
if (cause !== undefined) {
|
||||
super(message ?? 'PDF format error', { cause });
|
||||
}
|
||||
else {
|
||||
super(message ?? 'PDF format error');
|
||||
}
|
||||
this.name = 'FormatError';
|
||||
Object.setPrototypeOf(this, FormatError.prototype);
|
||||
if (typeof Error.captureStackTrace === 'function') {
|
||||
Error.captureStackTrace(this, FormatError);
|
||||
}
|
||||
// Fallback for older targets: if needed use (this as any).cause = cause;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Generic wrapper for errors where the library cannot classify the cause.
|
||||
*
|
||||
* The `details` property may contain additional information provided by the
|
||||
* underlying PDF library.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export class UnknownErrorException extends Error {
|
||||
/**
|
||||
* Create a new UnknownErrorException.
|
||||
* @param message - Optional error message.
|
||||
* @param details - Optional additional details from the PDF library.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message, details, cause) {
|
||||
if (cause !== undefined) {
|
||||
super(message ?? 'Unknown error', { cause });
|
||||
}
|
||||
else {
|
||||
super(message ?? 'Unknown error');
|
||||
}
|
||||
this.name = 'UnknownErrorException';
|
||||
Object.setPrototypeOf(this, UnknownErrorException.prototype);
|
||||
if (typeof Error.captureStackTrace === 'function') {
|
||||
Error.captureStackTrace(this, UnknownErrorException);
|
||||
}
|
||||
// additional info field from pdf.mjs
|
||||
this.details = details;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Represents an HTTP/network response error encountered while fetching PDF data.
|
||||
*
|
||||
* The `status` and `missing` properties mirror values that may be provided
|
||||
* by the underlying PDF library's network layer.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export class ResponseException extends Error {
|
||||
/**
|
||||
* Create a new ResponseException.
|
||||
* @param message - Optional error message.
|
||||
* @param status - Optional numeric HTTP/status code.
|
||||
* @param missing - Optional field describing missing resources.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message, status, missing, cause) {
|
||||
if (cause !== undefined) {
|
||||
super(message ?? 'Response error', { cause });
|
||||
}
|
||||
else {
|
||||
super(message ?? 'Response error');
|
||||
}
|
||||
this.name = 'ResponseException';
|
||||
Object.setPrototypeOf(this, ResponseException.prototype);
|
||||
if (typeof Error.captureStackTrace === 'function') {
|
||||
Error.captureStackTrace(this, ResponseException);
|
||||
}
|
||||
// fields from pdf.mjs
|
||||
this.status = status;
|
||||
this.missing = missing;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Error used to indicate that an operation was aborted (for example by an AbortSignal).
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export class AbortException extends Error {
|
||||
/**
|
||||
* Create a new AbortException.
|
||||
* @param message - Optional error message.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message, cause) {
|
||||
if (cause !== undefined) {
|
||||
super(message ?? 'Operation aborted', { cause });
|
||||
}
|
||||
else {
|
||||
super(message ?? 'Operation aborted');
|
||||
}
|
||||
this.name = 'AbortException';
|
||||
Object.setPrototypeOf(this, AbortException.prototype);
|
||||
if (typeof Error.captureStackTrace === 'function') {
|
||||
Error.captureStackTrace(this, AbortException);
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Normalize arbitrary thrown values into an Error instance used by the library.
|
||||
*
|
||||
* Known Error instances with specific names are mapped to the library's
|
||||
* typed exceptions in order to preserve type information and any additional
|
||||
* fields (for example `details`, `status`, etc.). If the value is not an
|
||||
* Error it is converted to a generic Error containing the stringified value.
|
||||
*
|
||||
* @public
|
||||
* @param error - The thrown value to normalize.
|
||||
* @returns An Error instance representing the provided value.
|
||||
*/
|
||||
export function getException(error) {
|
||||
if (error instanceof Error) {
|
||||
// preserve original error (stack) when not remapping
|
||||
switch (error.name) {
|
||||
case 'InvalidPDFException':
|
||||
return new InvalidPDFException(error.message, error);
|
||||
case 'PasswordException':
|
||||
return new PasswordException(error.message, error);
|
||||
case 'FormatError':
|
||||
return new FormatError(error.message, error);
|
||||
case 'UnknownErrorException':
|
||||
// preserve details if present on original
|
||||
return new UnknownErrorException(error.message, error.details, error);
|
||||
case 'ResponseException':
|
||||
return new ResponseException(error.message, error.status, error.missing, error);
|
||||
case 'AbortException':
|
||||
return new AbortException(error.message, error);
|
||||
// add other mappings as needed
|
||||
default:
|
||||
return error;
|
||||
}
|
||||
}
|
||||
// non-Error value -> convert to Error
|
||||
return new Error(String(error));
|
||||
}
|
||||
//# sourceMappingURL=Exception.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/Exception.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/Exception.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"Exception.js","sourceRoot":"","sources":["../../../src/pdf-parse/Exception.ts"],"names":[],"mappings":"AAAA,oEAAoE;AAEpE;;;;;;;GAOG;AACH,MAAM,OAAO,mBAAoB,SAAQ,KAAK;IAC7C;;;;OAIG;IACH,YAAY,OAAgB,EAAE,KAAe;QAC5C,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACzB,yDAAyD;YACzD,KAAK,CAAC,OAAO,IAAI,aAAa,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QAC5C,CAAC;aAAM,CAAC;YACP,KAAK,CAAC,OAAO,IAAI,aAAa,CAAC,CAAC;QACjC,CAAC;QACD,IAAI,CAAC,IAAI,GAAG,qBAAqB,CAAC;QAClC,uCAAuC;QACvC,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,mBAAmB,CAAC,SAAS,CAAC,CAAC;QAC3D,8CAA8C;QAC9C,IAAI,OAAQ,KAAa,CAAC,iBAAiB,KAAK,UAAU,EAAE,CAAC;YAC3D,KAAa,CAAC,iBAAiB,CAAC,IAAI,EAAE,mBAAmB,CAAC,CAAC;QAC7D,CAAC;QACD,0EAA0E;QAC1E,+DAA+D;QAC/D,wDAAwD;IACzD,CAAC;CACD;AAED;;;;GAIG;AACH,MAAM,OAAO,iBAAkB,SAAQ,KAAK;IAC3C;;;;OAIG;IACH,YAAY,OAAgB,EAAE,KAAe;QAC5C,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACzB,KAAK,CAAC,OAAO,IAAI,gCAAgC,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QAC/D,CAAC;aAAM,CAAC;YACP,KAAK,CAAC,OAAO,IAAI,gCAAgC,CAAC,CAAC;QACpD,CAAC;QACD,IAAI,CAAC,IAAI,GAAG,mBAAmB,CAAC;QAChC,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,iBAAiB,CAAC,SAAS,CAAC,CAAC;QACzD,IAAI,OAAQ,KAAa,CAAC,iBAAiB,KAAK,UAAU,EAAE,CAAC;YAC3D,KAAa,CAAC,iBAAiB,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;QAC3D,CAAC;QACD,yEAAyE;IAC1E,CAAC;CACD;AAED;;;;;;;GAOG;AACH,MAAM,OAAO,WAAY,SAAQ,KAAK;IACrC;;;;OAIG;IACH,YAAY,OAAgB,EAAE,KAAe;QAC5C,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACzB,KAAK,CAAC,OAAO,IAAI,kBAAkB,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACP,KAAK,CAAC,OAAO,IAAI,kBAAkB,CAAC,CAAC;QACtC,CAAC;QACD,IAAI,CAAC,IAAI,GAAG,aAAa,CAAC;QAC1B,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,WAAW,CAAC,SAAS,CAAC,CAAC;QACnD,IAAI,OAAQ,KAAa,CAAC,iBAAiB,KAAK,UAAU,EAAE,CAAC;YAC3D,KAAa,CAAC,iBAAiB,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;QACrD,CAAC;QACD,yEAAyE;IAC1E,CAAC;CACD;AAED;;;;;;;GAOG;AACH,MAAM,OAAO,qBAAsB,SAAQ,KAAK;IAC/C;;;;;OAKG;IACH,YAAY,OAAgB,EAAE,OAAiB,EAAE,KAAe;QAC/D,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACzB,KAAK,CAAC,OAAO,IAAI,eAAe,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QAC9C,CAAC;aAAM,CAAC;YACP,KAAK,CAAC,OAAO,IAAI,eAAe,CAAC,CAAC;QACnC,CAAC;QACD,IAAI,CAAC,IAAI,GAAG,uBAAuB,CAAC;QACpC,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,qBAAqB,CAAC,SAAS,CAAC,CAAC;QAC7D,IAAI,OAAQ,KAAa,CAAC,iBAAiB,KAAK,UAAU,EAAE,CAAC;YAC3D,KAAa,CAAC,iBAAiB,CAAC,IAAI,EAAE,qBAAqB,CAAC,CAAC;QAC/D,CAAC;QACD,qCAAqC;QACpC,IAAY,CAAC,OAAO,GAAG,OAAO,CAAC;IACjC,CAAC;CACD;AAED;;;;;;;GAOG;AACH,MAAM,OAAO,iBAAkB,SAAQ,KAAK;IAC3C;;;;;;OAMG;IACH,YAAY,OAAgB,EAAE,MAAe,EAAE,OAAiB,EAAE,KAAe;QAChF,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACzB,KAAK,CAAC,OAAO,IAAI,gBAAgB,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QAC/C,CAAC;aAAM,CAAC;YACP,KAAK,CAAC,OAAO,IAAI,gBAAgB,CAAC,CAAC;QACpC,CAAC;QACD,IAAI,CAAC,IAAI,GAAG,mBAAmB,CAAC;QAChC,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,iBAAiB,CAAC,SAAS,CAAC,CAAC;QACzD,IAAI,OAAQ,KAAa,CAAC,iBAAiB,KAAK,UAAU,EAAE,CAAC;YAC3D,KAAa,CAAC,iBAAiB,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;QAC3D,CAAC;QACD,sBAAsB;QACrB,IAAY,CAAC,MAAM,GAAG,MAAM,CAAC;QAC7B,IAAY,CAAC,OAAO,GAAG,OAAO,CAAC;IACjC,CAAC;CACD;AAED;;;;GAIG;AACH,MAAM,OAAO,cAAe,SAAQ,KAAK;IACxC;;;;OAIG;IACH,YAAY,OAAgB,EAAE,KAAe;QAC5C,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACzB,KAAK,CAAC,OAAO,IAAI,mBAAmB,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QAClD,CAAC;aAAM,CAAC;YACP,KAAK,CAAC,OAAO,IAAI,mBAAmB,CAAC,CAAC;QACvC,CAAC;QACD,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;QAC7B,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,cAAc,CAAC,SAAS,CAAC,CAAC;QACtD,IAAI,OAAQ,KAAa,CAAC,iBAAiB,KAAK,UAAU,EAAE,CAAC;YAC3D,KAAa,CAAC,iBAAiB,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACxD,CAAC;IACF,CAAC;CACD;AAED;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,YAAY,CAAC,KAAc;IAC1C,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;QAC5B,qDAAqD;QACrD,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACpB,KAAK,qBAAqB;gBACzB,OAAO,IAAI,mBAAmB,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;YACtD,KAAK,mBAAmB;gBACvB,OAAO,IAAI,iBAAiB,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;YACpD,KAAK,aAAa;gBACjB,OAAO,IAAI,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;YAC9C,KAAK,uBAAuB;gBAC3B,0CAA0C;gBAC1C,OAAO,IAAI,qBAAqB,CAAC,KAAK,CAAC,OAAO,EAAG,KAAa,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;YAChF,KAAK,mBAAmB;gBACvB,OAAO,IAAI,iBAAiB,CAAC,KAAK,CAAC,OAAO,EAAG,KAAa,CAAC,MAAM,EAAG,KAAa,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;YACnG,KAAK,gBAAgB;gBACpB,OAAO,IAAI,cAAc,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;YACjD,+BAA+B;YAC/B;gBACC,OAAO,KAAK,CAAC;QACf,CAAC;IACF,CAAC;IAED,sCAAsC;IACtC,OAAO,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;AACjC,CAAC"}
|
||||
55
node_modules/pdf-parse/dist/pdf-parse/esm/ImageResult.d.ts
generated
vendored
Normal file
55
node_modules/pdf-parse/dist/pdf-parse/esm/ImageResult.d.ts
generated
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
import type { ImageKind } from 'pdfjs-dist/legacy/build/pdf.mjs';
|
||||
/**
|
||||
* @public
|
||||
* ImageKindKey
|
||||
* - Represents the keys of the ImageKind enum (e.g. "GRAYSCALE_1BPP", "RGB_24BPP", "RGBA_32BPP").
|
||||
*/
|
||||
export type ImageKindKey = keyof typeof ImageKind;
|
||||
/**
|
||||
* @public
|
||||
* ImageKindValue
|
||||
* - Represents the numeric values of the ImageKind enum (e.g. 1, 2, 3).
|
||||
*/
|
||||
export type ImageKindValue = (typeof ImageKind)[ImageKindKey];
|
||||
/**
|
||||
* @public
|
||||
* ImageResult
|
||||
* Helper container for extracted images grouped per page.
|
||||
*/
|
||||
export declare class ImageResult {
|
||||
pages: Array<PageImages>;
|
||||
total: number;
|
||||
getPageImage(num: number, name: string): EmbeddedImage | null;
|
||||
constructor(total: number);
|
||||
}
|
||||
/**
|
||||
* @public
|
||||
* PageImages
|
||||
* - Represents all embedded images found on a single PDF page.
|
||||
* - pageNumber: 1-based page index.
|
||||
* - images: Array of EmbeddedImage objects for this page.
|
||||
*/
|
||||
export interface PageImages {
|
||||
pageNumber: number;
|
||||
images: EmbeddedImage[];
|
||||
}
|
||||
/**
|
||||
* @public
|
||||
* EmbeddedImage
|
||||
* - Normalized representation of an embedded image extracted from the PDF.
|
||||
* - `data`: Raw image bytes (e.g. PNG/JPEG) as Uint8Array. Use this for file writing or binary processing.
|
||||
* - `dataUrl`: Optional data URL (e.g. "data:image/png;base64,...") for directly embedding in <img> src.
|
||||
* Storing both lets consumers choose the most convenient form; consider omitting one to save memory.
|
||||
* - `name`: Resource name for the image.
|
||||
* - `width` / `height`: Dimensions in pixels.
|
||||
* - `kind`: ImageKindValue from indicating the pixel format (e.g. GRAYSCALE_1BPP / RGB_24BPP / RGBA_32BPP).
|
||||
*/
|
||||
export interface EmbeddedImage {
|
||||
data: Uint8Array;
|
||||
dataUrl: string;
|
||||
name: string;
|
||||
width: number;
|
||||
height: number;
|
||||
kind: ImageKindValue;
|
||||
}
|
||||
//# sourceMappingURL=ImageResult.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/ImageResult.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/ImageResult.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"ImageResult.d.ts","sourceRoot":"","sources":["../../../src/pdf-parse/ImageResult.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iCAAiC,CAAC;AAEjE;;;;GAIG;AACH,MAAM,MAAM,YAAY,GAAG,MAAM,OAAO,SAAS,CAAC;AAElD;;;;GAIG;AACH,MAAM,MAAM,cAAc,GAAG,CAAC,OAAO,SAAS,CAAC,CAAC,YAAY,CAAC,CAAC;AAE9D;;;;GAIG;AACH,qBAAa,WAAW;IACvB,KAAK,EAAE,KAAK,CAAC,UAAU,CAAC,CAAM;IAC9B,KAAK,EAAE,MAAM,CAAK;IAEX,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI;gBAaxD,KAAK,EAAE,MAAM;CAGzB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,UAAU;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,aAAa,EAAE,CAAC;CACxB;AAED;;;;;;;;;;GAUG;AACH,MAAM,WAAW,aAAa;IAE7B,IAAI,EAAE,UAAU,CAAC;IAGjB,OAAO,EAAE,MAAM,CAAC;IAGhB,IAAI,EAAE,MAAM,CAAC;IAGb,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IAGf,IAAI,EAAE,cAAc,CAAC;CACrB"}
|
||||
25
node_modules/pdf-parse/dist/pdf-parse/esm/ImageResult.js
generated
vendored
Normal file
25
node_modules/pdf-parse/dist/pdf-parse/esm/ImageResult.js
generated
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* @public
|
||||
* ImageResult
|
||||
* Helper container for extracted images grouped per page.
|
||||
*/
|
||||
export class ImageResult {
|
||||
pages = [];
|
||||
total = 0;
|
||||
getPageImage(num, name) {
|
||||
for (const pageData of this.pages) {
|
||||
if (pageData.pageNumber === num) {
|
||||
for (const img of pageData.images) {
|
||||
if (img.name === name) {
|
||||
return img;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
constructor(total) {
|
||||
this.total = total;
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=ImageResult.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/ImageResult.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/ImageResult.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"ImageResult.js","sourceRoot":"","sources":["../../../src/pdf-parse/ImageResult.ts"],"names":[],"mappings":"AAgBA;;;;GAIG;AACH,MAAM,OAAO,WAAW;IACvB,KAAK,GAAsB,EAAE,CAAC;IAC9B,KAAK,GAAW,CAAC,CAAC;IAEX,YAAY,CAAC,GAAW,EAAE,IAAY;QAC5C,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACnC,IAAI,QAAQ,CAAC,UAAU,KAAK,GAAG,EAAE,CAAC;gBACjC,KAAK,MAAM,GAAG,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;oBACnC,IAAI,GAAG,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;wBACvB,OAAO,GAAG,CAAC;oBACZ,CAAC;gBACF,CAAC;YACF,CAAC;QACF,CAAC;QACD,OAAO,IAAI,CAAC;IACb,CAAC;IAED,YAAY,KAAa;QACxB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACpB,CAAC;CACD"}
|
||||
104
node_modules/pdf-parse/dist/pdf-parse/esm/InfoResult.d.ts
generated
vendored
Normal file
104
node_modules/pdf-parse/dist/pdf-parse/esm/InfoResult.d.ts
generated
vendored
Normal file
@@ -0,0 +1,104 @@
|
||||
import type { Metadata } from 'pdfjs-dist/types/src/display/metadata.js';
|
||||
export type { Metadata } from 'pdfjs-dist/types/src/display/metadata.js';
|
||||
/**
|
||||
* @public
|
||||
* Node representing a single item in the PDF outline (bookmarks).
|
||||
* This mirrors the structure returned by PDF.js' getOutline() API.
|
||||
*/
|
||||
export interface OutlineNode {
|
||||
title: string;
|
||||
bold: boolean;
|
||||
italic: boolean;
|
||||
color: Uint8ClampedArray;
|
||||
dest: string | Array<any> | null;
|
||||
url: string | null;
|
||||
unsafeUrl?: string;
|
||||
newWindow?: boolean;
|
||||
count?: number;
|
||||
items: Array<any>;
|
||||
}
|
||||
/**
|
||||
* @public
|
||||
* Consolidated date information gathered from different PDF sources.
|
||||
* The PDF 'Info' dictionary contains CreationDate / ModDate and
|
||||
* the XMP/XAP metadata can contain several timestamps as well. This
|
||||
* structure collects those values (if present) as JavaScript Date objects
|
||||
* or null when the property exists but cannot be parsed.
|
||||
*/
|
||||
export type DateNode = {
|
||||
CreationDate?: Date | null;
|
||||
ModDate?: Date | null;
|
||||
XmpCreateDate?: Date | null;
|
||||
XmpModifyDate?: Date | null;
|
||||
XmpMetadataDate?: Date | null;
|
||||
XapCreateDate?: Date | null;
|
||||
XapModifyDate?: Date | null;
|
||||
XapMetadataDate?: Date | null;
|
||||
};
|
||||
/**
|
||||
* @public
|
||||
* Per-page link extraction result.
|
||||
* - pageNumber: the physical page index (1-based) within the PDF document.
|
||||
* - pageLabel: optional printed page label shown by PDF viewers (e.g. "iii", "1", "A-1");
|
||||
* this can differ from the physical page number and may be undefined
|
||||
* when the document does not provide labels.
|
||||
* - links: array of text->URL mappings that were found/overlaid on the page.
|
||||
* - width/height: page dimensions in PDF units for the viewport used.
|
||||
*/
|
||||
export type PageLinkResult = {
|
||||
pageNumber: number;
|
||||
pageLabel?: string | null;
|
||||
links: Array<{
|
||||
text: string;
|
||||
url: string;
|
||||
}>;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
/**
|
||||
* @public
|
||||
* Aggregated information about a PDF document returned by getInfo().
|
||||
* The object contains high-level metadata, outline/bookmark structure,
|
||||
* per-page extracted hyperlinks and utility helpers for parsing dates.
|
||||
*/
|
||||
export declare class InfoResult {
|
||||
total: number;
|
||||
/**
|
||||
* The PDF 'Info' dictionary. Typical fields include title, author, subject,
|
||||
* Creator, Producer and Creation/Modification dates. The exact structure is
|
||||
* determined by the PDF and as returned by PDF.js.
|
||||
*/
|
||||
info?: any;
|
||||
metadata?: Metadata;
|
||||
/**
|
||||
* An array of document fingerprint strings provided by PDF.js. Useful
|
||||
* for caching, de-duplication or identifying a document across runs.
|
||||
*/
|
||||
fingerprints?: Array<string | null>;
|
||||
/**
|
||||
* Permission flags for the document as returned by PDF.js (or null).
|
||||
* These flags indicate capabilities such as printing, copying and
|
||||
* other restrictions imposed by the PDF security settings.
|
||||
*/
|
||||
permission?: number[] | null;
|
||||
/**
|
||||
* Optional document outline (bookmarks). When present this is the
|
||||
* hierarchical navigation structure which viewers use for quick access.
|
||||
*/
|
||||
outline?: Array<OutlineNode> | null;
|
||||
pages: Array<PageLinkResult>;
|
||||
/**
|
||||
* Collects dates from different sources (Info dictionary and XMP/XAP metadata)
|
||||
* and returns them as a DateNode where available. This helps callers compare
|
||||
* and choose the most relevant timestamp (for example a creation date vs XMP date).
|
||||
*/
|
||||
getDateNode(): DateNode;
|
||||
/**
|
||||
* Try to parse an ISO-8601 date string from XMP/XAP metadata. If the
|
||||
* value is falsy or cannot be parsed, undefined is returned to indicate
|
||||
* absence or unparsable input.
|
||||
*/
|
||||
private parseISODateString;
|
||||
constructor(total: number);
|
||||
}
|
||||
//# sourceMappingURL=InfoResult.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/InfoResult.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/InfoResult.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"InfoResult.d.ts","sourceRoot":"","sources":["../../../src/pdf-parse/InfoResult.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,0CAA0C,CAAC;AAWzE,YAAY,EAAE,QAAQ,EAAE,MAAM,0CAA0C,CAAC;AAEzE;;;;GAIG;AACH,MAAM,WAAW,WAAW;IAE3B,KAAK,EAAE,MAAM,CAAC;IAGd,IAAI,EAAE,OAAO,CAAC;IAGd,MAAM,EAAE,OAAO,CAAC;IAGhB,KAAK,EAAE,iBAAiB,CAAC;IAMzB,IAAI,EAAE,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC;IAGjC,GAAG,EAAE,MAAM,GAAG,IAAI,CAAC;IAGnB,SAAS,CAAC,EAAE,MAAM,CAAC;IAGnB,SAAS,CAAC,EAAE,OAAO,CAAC;IAGpB,KAAK,CAAC,EAAE,MAAM,CAAC;IAIf,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC;CAClB;AAED;;;;;;;GAOG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB,YAAY,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IAC3B,OAAO,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IACtB,aAAa,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IAC5B,aAAa,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IAC5B,eAAe,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IAC9B,aAAa,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IAC5B,aAAa,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IAC5B,eAAe,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;CAC9B,CAAC;AAEF;;;;;;;;;GASG;AACH,MAAM,MAAM,cAAc,GAAG;IAE5B,UAAU,EAAE,MAAM,CAAC;IAInB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAI1B,KAAK,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAG5C,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CACf,CAAC;AAEF;;;;;GAKG;AACH,qBAAa,UAAU;IAEtB,KAAK,EAAE,MAAM,CAAC;IAEd;;;;OAIG;IAEH,IAAI,CAAC,EAAE,GAAG,CAAC;IAIX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IAEpB;;;OAGG;IACH,YAAY,CAAC,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAEpC;;;;OAIG;IACH,UAAU,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IAE7B;;;OAGG;IACH,OAAO,CAAC,EAAE,KAAK,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC;IAGpC,KAAK,EAAE,KAAK,CAAC,cAAc,CAAC,CAAM;IAElC;;;;OAIG;IACI,WAAW,IAAI,QAAQ;IAuD9B;;;;OAIG;IACH,OAAO,CAAC,kBAAkB;gBAWd,KAAK,EAAE,MAAM;CAGzB"}
|
||||
116
node_modules/pdf-parse/dist/pdf-parse/esm/InfoResult.js
generated
vendored
Normal file
116
node_modules/pdf-parse/dist/pdf-parse/esm/InfoResult.js
generated
vendored
Normal file
@@ -0,0 +1,116 @@
|
||||
import * as pdfjs from 'pdfjs-dist/legacy/build/pdf.mjs';
|
||||
const XMP_DATE_PROPERTIES = [
|
||||
'xmp:createdate',
|
||||
'xmp:modifydate',
|
||||
'xmp:metadatadate',
|
||||
'xap:createdate',
|
||||
'xap:modifydate',
|
||||
'xap:metadatadate',
|
||||
];
|
||||
/**
|
||||
* @public
|
||||
* Aggregated information about a PDF document returned by getInfo().
|
||||
* The object contains high-level metadata, outline/bookmark structure,
|
||||
* per-page extracted hyperlinks and utility helpers for parsing dates.
|
||||
*/
|
||||
export class InfoResult {
|
||||
// Total number of pages in the PDF document (count of physical pages).
|
||||
total;
|
||||
/**
|
||||
* The PDF 'Info' dictionary. Typical fields include title, author, subject,
|
||||
* Creator, Producer and Creation/Modification dates. The exact structure is
|
||||
* determined by the PDF and as returned by PDF.js.
|
||||
*/
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <unsupported underline type>
|
||||
info;
|
||||
// Low-level document metadata object (XMP). Use this to access extended
|
||||
// properties that are not present in the Info dictionary.
|
||||
metadata;
|
||||
/**
|
||||
* An array of document fingerprint strings provided by PDF.js. Useful
|
||||
* for caching, de-duplication or identifying a document across runs.
|
||||
*/
|
||||
fingerprints;
|
||||
/**
|
||||
* Permission flags for the document as returned by PDF.js (or null).
|
||||
* These flags indicate capabilities such as printing, copying and
|
||||
* other restrictions imposed by the PDF security settings.
|
||||
*/
|
||||
permission;
|
||||
/**
|
||||
* Optional document outline (bookmarks). When present this is the
|
||||
* hierarchical navigation structure which viewers use for quick access.
|
||||
*/
|
||||
outline;
|
||||
// Results with per-page hyperlink extraction. Empty array by default.
|
||||
pages = [];
|
||||
/**
|
||||
* Collects dates from different sources (Info dictionary and XMP/XAP metadata)
|
||||
* and returns them as a DateNode where available. This helps callers compare
|
||||
* and choose the most relevant timestamp (for example a creation date vs XMP date).
|
||||
*/
|
||||
getDateNode() {
|
||||
const result = {};
|
||||
// The Info dictionary may contain CreationDate/ModDate in PDF date string format.
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <unsupported underline type>
|
||||
const CreationDate = this.info?.CreationDate;
|
||||
if (CreationDate) {
|
||||
result.CreationDate = pdfjs.PDFDateString.toDateObject(CreationDate);
|
||||
}
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <unsupported underline type>
|
||||
const ModDate = this.info?.ModDate;
|
||||
if (ModDate) {
|
||||
result.ModDate = pdfjs.PDFDateString.toDateObject(ModDate);
|
||||
}
|
||||
// If no XMP metadata is present, return the Info-based dates only.
|
||||
if (!this.metadata) {
|
||||
return result;
|
||||
}
|
||||
// Extract several XMP/XAP date properties (if present) and attempt to
|
||||
// parse them as ISO-like strings. Parsed values are added to the
|
||||
// corresponding DateNode fields.
|
||||
for (const prop of XMP_DATE_PROPERTIES) {
|
||||
const value = this.metadata?.get(prop);
|
||||
const date = this.parseISODateString(value);
|
||||
switch (prop) {
|
||||
case XMP_DATE_PROPERTIES[0]:
|
||||
result.XmpCreateDate = date;
|
||||
break;
|
||||
case XMP_DATE_PROPERTIES[1]:
|
||||
result.XmpModifyDate = date;
|
||||
break;
|
||||
case XMP_DATE_PROPERTIES[2]:
|
||||
result.XmpMetadataDate = date;
|
||||
break;
|
||||
case XMP_DATE_PROPERTIES[3]:
|
||||
result.XapCreateDate = date;
|
||||
break;
|
||||
case XMP_DATE_PROPERTIES[4]:
|
||||
result.XapModifyDate = date;
|
||||
break;
|
||||
case XMP_DATE_PROPERTIES[5]:
|
||||
result.XapMetadataDate = date;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
* Try to parse an ISO-8601 date string from XMP/XAP metadata. If the
|
||||
* value is falsy or cannot be parsed, undefined is returned to indicate
|
||||
* absence or unparsable input.
|
||||
*/
|
||||
parseISODateString(isoDateString) {
|
||||
if (!isoDateString)
|
||||
return undefined;
|
||||
const parsedDate = Date.parse(isoDateString);
|
||||
if (!Number.isNaN(parsedDate)) {
|
||||
return new Date(parsedDate);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
constructor(total) {
|
||||
this.total = total;
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=InfoResult.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/InfoResult.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/InfoResult.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"InfoResult.js","sourceRoot":"","sources":["../../../src/pdf-parse/InfoResult.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,MAAM,iCAAiC,CAAC;AAGzD,MAAM,mBAAmB,GAAG;IAC3B,gBAAgB;IAChB,gBAAgB;IAChB,kBAAkB;IAClB,gBAAgB;IAChB,gBAAgB;IAChB,kBAAkB;CAClB,CAAC;AA2FF;;;;;GAKG;AACH,MAAM,OAAO,UAAU;IACtB,uEAAuE;IACvE,KAAK,CAAS;IAEd;;;;OAIG;IACH,2EAA2E;IAC3E,IAAI,CAAO;IAEX,wEAAwE;IACxE,0DAA0D;IAC1D,QAAQ,CAAY;IAEpB;;;OAGG;IACH,YAAY,CAAwB;IAEpC;;;;OAIG;IACH,UAAU,CAAmB;IAE7B;;;OAGG;IACH,OAAO,CAA6B;IAEpC,sEAAsE;IACtE,KAAK,GAA0B,EAAE,CAAC;IAElC;;;;OAIG;IACI,WAAW;QACjB,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,kFAAkF;QAClF,2EAA2E;QAC3E,MAAM,YAAY,GAAI,IAAI,CAAC,IAAY,EAAE,YAAY,CAAC;QAEtD,IAAI,YAAY,EAAE,CAAC;YAClB,MAAM,CAAC,YAAY,GAAG,KAAK,CAAC,aAAa,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;QACtE,CAAC;QAED,2EAA2E;QAC3E,MAAM,OAAO,GAAI,IAAI,CAAC,IAAY,EAAE,OAAO,CAAC;QAE5C,IAAI,OAAO,EAAE,CAAC;YACb,MAAM,CAAC,OAAO,GAAG,KAAK,CAAC,aAAa,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;QAC5D,CAAC;QAED,mEAAmE;QACnE,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACpB,OAAO,MAAM,CAAC;QACf,CAAC;QAED,sEAAsE;QACtE,iEAAiE;QACjE,iCAAiC;QACjC,KAAK,MAAM,IAAI,IAAI,mBAAmB,EAAE,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;YACvC,MAAM,IAAI,GAAG,IAAI,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC;YAE5C,QAAQ,IAAI,EAAE,CAAC;gBACd,KAAK,mBAAmB,CAAC,CAAC,CAAC;oBAC1B,MAAM,CAAC,aAAa,GAAG,IAAI,CAAC;oBAC5B,MAAM;gBACP,KAAK,mBAAmB,CAAC,CAAC,CAAC;oBAC1B,MAAM,CAAC,aAAa,GAAG,IAAI,CAAC;oBAC5B,MAAM;gBACP,KAAK,mBAAmB,CAAC,CAAC,CAAC;oBAC1B,MAAM,CAAC,eAAe,GAAG,IAAI,CAAC;oBAC9B,MAAM;gBACP,KAAK,mBAAmB,CAAC,CAAC,CAAC;oBAC1B,MAAM,CAAC,aAAa,GAAG,IAAI,CAAC;oBAC5B,MAAM;gBACP,KAAK,mBAAmB,CAAC,CAAC,CAAC;oBAC1B,MAAM,CAAC,aAAa,GAAG,IAAI,CAAC;oBAC5B,MAAM;gBACP,KAAK,mBAAmB,CAAC,CAAC,CAAC;oBAC1B,MAAM,CAAC,eAAe,GAAG,IAAI,CAAC;oBAC9B,MAAM;YACR,CAAC;QACF,CAAC;QAED,OAAO,MAAM,CAAC;IACf,CAAC;IAED;;;;OAIG;IACK,kBAAkB,CAAC,aAAqB;QAC/C,IAAI,CAAC,aAAa;YAAE,OAAO,SAAS,CAAC;QAErC,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC7C,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/B,OAAO,IAAI,IAAI,CAAC,UAAU,CAAC,CAAC;QAC7B,CAAC;QAED,OAAO,SAAS,CAAC;IAClB,CAAC;IAED,YAAY,KAAa;QACxB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACpB,CAAC;CACD"}
|
||||
212
node_modules/pdf-parse/dist/pdf-parse/esm/LoadParameters.d.ts
generated
vendored
Normal file
212
node_modules/pdf-parse/dist/pdf-parse/esm/LoadParameters.d.ts
generated
vendored
Normal file
@@ -0,0 +1,212 @@
|
||||
/** biome-ignore-all lint/complexity/noBannedTypes: for underline types */
|
||||
import type { DocumentInitParameters, PDFDataRangeTransport, PDFWorker } from 'pdfjs-dist/types/src/display/api.js';
|
||||
export type { PDFDataRangeTransport, PDFWorker };
|
||||
/**
|
||||
* @public
|
||||
* LoadParameters
|
||||
* PDF loading parameters.
|
||||
*/
|
||||
export interface LoadParameters extends DocumentInitParameters {
|
||||
/**
|
||||
* The URL of the PDF.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
url?: string | URL | undefined;
|
||||
/**
|
||||
* Binary PDF data.
|
||||
* Use TypedArrays (e.g., `Uint8Array`) to improve memory usage. If PDF data is BASE64-encoded, use `atob()` to convert it to a binary string first.
|
||||
* **NOTE**: If TypedArrays are used, they will generally be transferred to the worker thread, reducing main-thread memory usage but taking ownership of the array.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
data?: string | number[] | ArrayBuffer | TypedArray | undefined;
|
||||
/**
|
||||
* Basic authentication headers.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
httpHeaders?: Object | undefined;
|
||||
/**
|
||||
* Indicates whether cross-site Access-Control requests should be made using credentials (e.g., cookies or auth headers).
|
||||
* Default: `false`.
|
||||
*/
|
||||
withCredentials?: boolean | undefined;
|
||||
/**
|
||||
* For decrypting password-protected PDFs.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
password?: string | undefined;
|
||||
/**
|
||||
* The PDF file length. Used for progress reports and range requests.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
length?: number | undefined;
|
||||
/**
|
||||
* Allows using a custom range transport implementation.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
range?: PDFDataRangeTransport | undefined;
|
||||
/**
|
||||
* Maximum number of bytes fetched per range request.
|
||||
* Default: `65536` (`2^16`).
|
||||
*/
|
||||
rangeChunkSize?: number | undefined;
|
||||
/**
|
||||
* The worker used for loading and parsing PDF data.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
worker?: PDFWorker | undefined;
|
||||
/**
|
||||
* Controls logging level; use constants from `VerbosityLevel`.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
verbosity?: number | undefined;
|
||||
/**
|
||||
* Base URL of the document, used to resolve relative URLs in annotations and outline items.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
docBaseUrl?: string | undefined;
|
||||
/**
|
||||
* URL where predefined Adobe CMaps are located. Include trailing slash.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
cMapUrl?: string | undefined;
|
||||
/**
|
||||
* Specifies if Adobe CMaps are binary-packed.
|
||||
* Default: `true`.
|
||||
*/
|
||||
cMapPacked?: boolean | undefined;
|
||||
/**
|
||||
* Factory for reading built-in CMap files.
|
||||
* Default: `{DOMCMapReaderFactory}`.
|
||||
*/
|
||||
CMapReaderFactory?: Object | undefined;
|
||||
/**
|
||||
* URL where predefined ICC profiles are located. Include trailing slash.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
iccUrl?: string | undefined;
|
||||
/**
|
||||
* If `true`, non-embedded fonts fall back to system fonts.
|
||||
* Default: `true` in browsers, `false` in Node.js (unless `disableFontFace === true`, then always `false`).
|
||||
*/
|
||||
useSystemFonts?: boolean | undefined;
|
||||
/**
|
||||
* URL for standard font files. Include trailing slash.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
standardFontDataUrl?: string | undefined;
|
||||
/**
|
||||
* Factory for reading standard font files.
|
||||
* Default: `{DOMStandardFontDataFactory}`.
|
||||
*/
|
||||
StandardFontDataFactory?: Object | undefined;
|
||||
/**
|
||||
* URL for WebAssembly files. Include trailing slash.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
wasmUrl?: string | undefined;
|
||||
/**
|
||||
* Factory for reading WASM files.
|
||||
* Default: `{DOMWasmFactory}`.
|
||||
*/
|
||||
WasmFactory?: Object | undefined;
|
||||
/**
|
||||
* Enable `fetch()` in worker thread for CMap/font/WASM files. If `true`, factory options are ignored.
|
||||
* Default: `true` in browsers, `false` in Node.js.
|
||||
*/
|
||||
useWorkerFetch?: boolean | undefined;
|
||||
/**
|
||||
* Attempt to use WebAssembly for better performance (e.g., image decoding).
|
||||
* Default: `true`.
|
||||
*/
|
||||
useWasm?: boolean | undefined;
|
||||
/**
|
||||
* Reject promises (e.g., `getTextContent`) on parse errors instead of recovering partially.
|
||||
* Default: `false`.
|
||||
*/
|
||||
stopAtErrors?: boolean | undefined;
|
||||
/**
|
||||
* Max image size in total pixels (`width * height`). Use `-1` for no limit.
|
||||
* Default: `-1`.
|
||||
*/
|
||||
maxImageSize?: number | undefined;
|
||||
/**
|
||||
* Whether evaluating strings as JS is allowed (for PDF function performance).
|
||||
* Default: `true`.
|
||||
*/
|
||||
isEvalSupported?: boolean | undefined;
|
||||
/**
|
||||
* Whether `OffscreenCanvas` can be used in worker.
|
||||
* Default: `true` in browsers, `false` in Node.js.
|
||||
*/
|
||||
isOffscreenCanvasSupported?: boolean | undefined;
|
||||
/**
|
||||
* Whether `ImageDecoder` can be used in worker.
|
||||
* Default: `true` in browsers, `false` in Node.js.
|
||||
* **NOTE**: Temporarily disabled in Chromium due to bugs:
|
||||
* - Crashes with BMP decoder on huge images ([issue 374807001](https://issues.chromium.org/issues/374807001))
|
||||
* - Broken JPEGs with custom color profiles ([issue 378869810](https://issues.chromium.org/issues/378869810))
|
||||
*/
|
||||
isImageDecoderSupported?: boolean | undefined;
|
||||
/**
|
||||
* Used to determine when to resize images (via `OffscreenCanvas`). Use `-1` to use a slower fallback algorithm.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
canvasMaxAreaInBytes?: number | undefined;
|
||||
/**
|
||||
* Disable `@font-face`/Font Loading API; use built-in glyph renderer instead.
|
||||
* Default: `false` in browsers, `true` in Node.js.
|
||||
*/
|
||||
disableFontFace?: boolean | undefined;
|
||||
/**
|
||||
* Include extra (non-rendering) font properties when exporting font data from worker. Increases memory usage.
|
||||
* Default: `false`.
|
||||
*/
|
||||
fontExtraProperties?: boolean | undefined;
|
||||
/**
|
||||
* Render XFA forms if present.
|
||||
* Default: `false`.
|
||||
*/
|
||||
enableXfa?: boolean | undefined;
|
||||
/**
|
||||
* Explicit document context for creating elements and loading resources. Defaults to current document.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
ownerDocument?: HTMLDocument | undefined;
|
||||
/**
|
||||
* Disable range requests for PDF loading.
|
||||
* Default: `false`.
|
||||
*/
|
||||
disableRange?: boolean | undefined;
|
||||
/**
|
||||
* Disable streaming PDF data.
|
||||
* Default: `false`.
|
||||
*/
|
||||
disableStream?: boolean | undefined;
|
||||
/**
|
||||
* Disable pre-fetching of PDF data. Requires `disableStream: true` to work fully.
|
||||
* Default: `false`.
|
||||
*/
|
||||
disableAutoFetch?: boolean | undefined;
|
||||
/**
|
||||
* Enable debugging hooks (see `web/debugger.js`).
|
||||
* Default: `false`.
|
||||
*/
|
||||
pdfBug?: boolean | undefined;
|
||||
/**
|
||||
* Factory for creating canvases.
|
||||
* Default: `{DOMCanvasFactory}`.
|
||||
*/
|
||||
CanvasFactory?: Object | undefined;
|
||||
/**
|
||||
* Factory for creating SVG filters during rendering.
|
||||
* Default: `{DOMFilterFactory}`.
|
||||
*/
|
||||
FilterFactory?: Object | undefined;
|
||||
/**
|
||||
* Enable hardware acceleration for rendering.
|
||||
* Default: `false`.
|
||||
*/
|
||||
enableHWA?: boolean | undefined;
|
||||
}
|
||||
export type TypedArray = Int8Array | Uint8Array | Uint8ClampedArray | Int16Array | Uint16Array | Int32Array | Uint32Array | Float32Array | Float64Array;
|
||||
//# sourceMappingURL=LoadParameters.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/LoadParameters.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/LoadParameters.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"LoadParameters.d.ts","sourceRoot":"","sources":["../../../src/pdf-parse/LoadParameters.ts"],"names":[],"mappings":"AAAA,0EAA0E;AAE1E,OAAO,KAAK,EAAE,sBAAsB,EAAE,qBAAqB,EAAE,SAAS,EAAE,MAAM,qCAAqC,CAAC;AAEpH,YAAY,EAAE,qBAAqB,EAAE,SAAS,EAAE,CAAC;AAEjD;;;;GAIG;AACH,MAAM,WAAW,cAAe,SAAQ,sBAAsB;IAC7D;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,GAAG,GAAG,GAAG,SAAS,CAAC;IAC/B;;;;;OAKG;IACH,IAAI,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,WAAW,GAAG,UAAU,GAAG,SAAS,CAAC;IAChE;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC;;;OAGG;IACH,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACtC;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B;;;OAGG;IACH,KAAK,CAAC,EAAE,qBAAqB,GAAG,SAAS,CAAC;IAC1C;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACpC;;;OAGG;IACH,MAAM,CAAC,EAAE,SAAS,GAAG,SAAS,CAAC;IAC/B;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAChC;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B;;;OAGG;IACH,UAAU,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACjC;;;OAGG;IACH,iBAAiB,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACvC;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B;;;OAGG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACrC;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACzC;;;OAGG;IACH,uBAAuB,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7C;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC;;;OAGG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACrC;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IAC9B;;;OAGG;IACH,YAAY,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACnC;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAClC;;;OAGG;IACH,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACtC;;;OAGG;IACH,0BAA0B,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACjD;;;;;;OAMG;IACH,uBAAuB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IAC9C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC1C;;;OAGG;IACH,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACtC;;;OAGG;IACH,mBAAmB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IAC1C;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IAChC;;;OAGG;IACH,aAAa,CAAC,EAAE,YAAY,GAAG,SAAS,CAAC;IACzC;;;OAGG;IACH,YAAY,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACnC;;;OAGG;IACH,aAAa,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACpC;;;OAGG;IACH,gBAAgB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACvC;;;OAGG;IACH,MAAM,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IAC7B;;;OAGG;IACH,aAAa,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACnC;;;OAGG;IACH,aAAa,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACnC;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CAChC;AAED,MAAM,MAAM,UAAU,GACnB,SAAS,GACT,UAAU,GACV,iBAAiB,GACjB,UAAU,GACV,WAAW,GACX,UAAU,GACV,WAAW,GACX,YAAY,GACZ,YAAY,CAAC"}
|
||||
3
node_modules/pdf-parse/dist/pdf-parse/esm/LoadParameters.js
generated
vendored
Normal file
3
node_modules/pdf-parse/dist/pdf-parse/esm/LoadParameters.js
generated
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
/** biome-ignore-all lint/complexity/noBannedTypes: for underline types */
|
||||
export {};
|
||||
//# sourceMappingURL=LoadParameters.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/LoadParameters.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/LoadParameters.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"LoadParameters.js","sourceRoot":"","sources":["../../../src/pdf-parse/LoadParameters.ts"],"names":[],"mappings":"AAAA,0EAA0E"}
|
||||
95
node_modules/pdf-parse/dist/pdf-parse/esm/PDFParse.d.ts
generated
vendored
Normal file
95
node_modules/pdf-parse/dist/pdf-parse/esm/PDFParse.d.ts
generated
vendored
Normal file
@@ -0,0 +1,95 @@
|
||||
import { ImageResult } from './ImageResult.js';
|
||||
import { InfoResult } from './InfoResult.js';
|
||||
import type { LoadParameters } from './LoadParameters.js';
|
||||
import { type ParseParameters } from './ParseParameters.js';
|
||||
import { ScreenshotResult } from './ScreenshotResult.js';
|
||||
import { TableResult } from './TableResult.js';
|
||||
import { TextResult } from './TextResult.js';
|
||||
/**
|
||||
* @public
|
||||
* Loads PDF documents and exposes helpers for text, image, table, metadata, and screenshot extraction.
|
||||
*/
|
||||
export declare class PDFParse {
|
||||
private readonly options;
|
||||
private doc;
|
||||
progress: {
|
||||
loaded: number;
|
||||
total: number;
|
||||
};
|
||||
/**
|
||||
* Create a new parser with `LoadParameters`.
|
||||
* Converts Node.js `Buffer` data to `Uint8Array` automatically and ensures a default verbosity level.
|
||||
* @param options - Initialization parameters.
|
||||
*/
|
||||
constructor(options: LoadParameters);
|
||||
destroy(): Promise<void>;
|
||||
static get isNodeJS(): boolean;
|
||||
static setWorker(workerSrc?: string): string;
|
||||
/**
|
||||
* Load document-level metadata (info, outline, permissions, page labels) and optionally gather per-page link details.
|
||||
* @param params - Parse options; set `parsePageInfo` to collect per-page metadata described in `ParseParameters`.
|
||||
* @returns Aggregated document metadata in an `InfoResult`.
|
||||
*/
|
||||
getInfo(params?: ParseParameters): Promise<InfoResult>;
|
||||
private getPageLinks;
|
||||
/**
|
||||
* Extract plain text for each requested page, optionally enriching hyperlinks and enforcing line or cell separators.
|
||||
* @param params - Parse options controlling pagination, link handling, and line/cell thresholds.
|
||||
* @returns A `TextResult` containing page-wise text and a concatenated document string.
|
||||
*/
|
||||
getText(params?: ParseParameters): Promise<TextResult>;
|
||||
private load;
|
||||
private shouldParse;
|
||||
private getPageText;
|
||||
private getHyperlinks;
|
||||
/**
|
||||
* Extract embedded images from requested pages.
|
||||
*
|
||||
* Behavior notes:
|
||||
* - Pages are selected according to ParseParameters (partial, first, last).
|
||||
* - Images smaller than `params.imageThreshold` (width OR height) are skipped.
|
||||
* - Returned ImageResult contains per-page PageImages; each image entry includes:
|
||||
* - data: Uint8Array (present when params.imageBuffer === true)
|
||||
* - dataUrl: string (present when params.imageDataUrl === true)
|
||||
* - width, height, kind, name
|
||||
* - Works in both Node.js (canvas.toBuffer) and browser (canvas.toDataURL) environments.
|
||||
*
|
||||
* @param params - ParseParameters controlling page selection, thresholds and output format.
|
||||
* @returns Promise<ImageResult> with extracted images grouped by page.
|
||||
*/
|
||||
getImage(params?: ParseParameters): Promise<ImageResult>;
|
||||
private convertToRGBA;
|
||||
private resolveEmbeddedImage;
|
||||
/**
|
||||
* Render pages to raster screenshots.
|
||||
*
|
||||
* Behavior notes:
|
||||
* - Pages are selected according to ParseParameters (partial, first, last).
|
||||
* - Use params.scale for zoom; if params.desiredWidth is specified it takes precedence.
|
||||
* - Each ScreenshotResult page contains:
|
||||
* - data: Uint8Array (when params.imageBuffer === true)
|
||||
* - dataUrl: string (when params.imageDataUrl === true)
|
||||
* - pageNumber, width, height, scale
|
||||
* - Works in both Node.js (canvas.toBuffer) and browser (canvas.toDataURL) environments.
|
||||
*
|
||||
* @param parseParams - ParseParameters controlling page selection and render options.
|
||||
* @returns Promise<ScreenshotResult> with rendered page images.
|
||||
*/
|
||||
getScreenshot(parseParams?: ParseParameters): Promise<ScreenshotResult>;
|
||||
/**
|
||||
* Detect and extract tables from pages by analysing vector drawing operators, then populate cells with text.
|
||||
*
|
||||
* Behavior notes:
|
||||
* - Scans operator lists for rectangles/lines that form table grids (uses PathGeometry and LineStore).
|
||||
* - Normalizes detected geometry and matches positioned text to table cells.
|
||||
* - Honors ParseParameters for page selection.
|
||||
*
|
||||
* @param params - ParseParameters controlling which pages to analyse (partial/first/last).
|
||||
* @returns Promise<TableResult> containing discovered tables per page.
|
||||
*/
|
||||
getTable(params?: ParseParameters): Promise<TableResult>;
|
||||
private getPathGeometry;
|
||||
private getPageTables;
|
||||
private fillPageTables;
|
||||
}
|
||||
//# sourceMappingURL=PDFParse.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/PDFParse.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/PDFParse.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"PDFParse.d.ts","sourceRoot":"","sources":["../../../src/pdf-parse/PDFParse.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,WAAW,EAAmB,MAAM,kBAAkB,CAAC;AAChE,OAAO,EAAE,UAAU,EAAuB,MAAM,iBAAiB,CAAC;AAClE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,EAAE,KAAK,eAAe,EAA6B,MAAM,sBAAsB,CAAC;AAEvF,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAwB,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,EAA0B,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAErE;;;GAGG;AACH,qBAAa,QAAQ;IACpB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAiB;IACzC,OAAO,CAAC,GAAG,CAA+B;IACnC,QAAQ,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAA4B;IAE9E;;;;OAIG;gBACS,OAAO,EAAE,cAAc;IAYtB,OAAO;IAQpB,WAAkB,QAAQ,IAAI,OAAO,CAYpC;WAEa,SAAS,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM;IAsBnD;;;;OAIG;IACU,OAAO,CAAC,MAAM,GAAE,eAAoB,GAAG,OAAO,CAAC,UAAU,CAAC;YA6BzD,YAAY;IA2B1B;;;;OAIG;IACU,OAAO,CAAC,MAAM,GAAE,eAAoB,GAAG,OAAO,CAAC,UAAU,CAAC;YA6BzD,IAAI;IAkBlB,OAAO,CAAC,WAAW;YAwCL,WAAW;YAiFX,aAAa;IAkC3B;;;;;;;;;;;;;;OAcG;IACU,QAAQ,CAAC,MAAM,GAAE,eAAoB,GAAG,OAAO,CAAC,WAAW,CAAC;IA0GzE,OAAO,CAAC,aAAa;IAuErB,OAAO,CAAC,oBAAoB;IAqD5B;;;;;;;;;;;;;;OAcG;IACU,aAAa,CAAC,WAAW,GAAE,eAAoB,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAoGxF;;;;;;;;;;OAUG;IACU,QAAQ,CAAC,MAAM,GAAE,eAAoB,GAAG,OAAO,CAAC,WAAW,CAAC;IA4CzE,OAAO,CAAC,eAAe;YAmBT,aAAa;YAwJb,cAAc;CA0C5B"}
|
||||
827
node_modules/pdf-parse/dist/pdf-parse/esm/PDFParse.js
generated
vendored
Normal file
827
node_modules/pdf-parse/dist/pdf-parse/esm/PDFParse.js
generated
vendored
Normal file
@@ -0,0 +1,827 @@
|
||||
import * as pdfjs from 'pdfjs-dist/legacy/build/pdf.mjs';
|
||||
import { getException } from './Exception.js';
|
||||
import { Line, LineStore, Point, Rectangle } from './geometry/index.js';
|
||||
import { ImageResult } from './ImageResult.js';
|
||||
import { InfoResult } from './InfoResult.js';
|
||||
import { setDefaultParseParameters } from './ParseParameters.js';
|
||||
import { PathGeometry } from './PathGeometry.js';
|
||||
import { ScreenshotResult } from './ScreenshotResult.js';
|
||||
import { TableResult } from './TableResult.js';
|
||||
import { TextResult } from './TextResult.js';
|
||||
/**
|
||||
* @public
|
||||
* Loads PDF documents and exposes helpers for text, image, table, metadata, and screenshot extraction.
|
||||
*/
|
||||
export class PDFParse {
|
||||
options;
|
||||
doc;
|
||||
progress = { loaded: -1, total: 0 };
|
||||
/**
|
||||
* Create a new parser with `LoadParameters`.
|
||||
* Converts Node.js `Buffer` data to `Uint8Array` automatically and ensures a default verbosity level.
|
||||
* @param options - Initialization parameters.
|
||||
*/
|
||||
constructor(options) {
|
||||
if (options.verbosity === undefined) {
|
||||
options.verbosity = pdfjs.VerbosityLevel.ERRORS;
|
||||
}
|
||||
if (typeof Buffer !== 'undefined' && options.data instanceof Buffer) {
|
||||
options.data = new Uint8Array(options.data);
|
||||
}
|
||||
this.options = options;
|
||||
}
|
||||
async destroy() {
|
||||
if (this.doc) {
|
||||
await this.doc.destroy();
|
||||
this.doc = undefined;
|
||||
}
|
||||
}
|
||||
// biome-ignore-start lint/suspicious/noExplicitAny: unsupported underline type
|
||||
static get isNodeJS() {
|
||||
const isNodeJS = typeof process === 'object' &&
|
||||
`${process}` === '[object process]' &&
|
||||
!process.versions.nw &&
|
||||
!(process.versions.electron &&
|
||||
typeof process.type !== 'undefined' &&
|
||||
process.type !== 'browser');
|
||||
return isNodeJS;
|
||||
}
|
||||
static setWorker(workerSrc) {
|
||||
if (typeof globalThis.pdfjs === 'undefined') {
|
||||
globalThis.pdfjs = pdfjs;
|
||||
}
|
||||
if (pdfjs?.GlobalWorkerOptions === null)
|
||||
return '';
|
||||
if (workerSrc !== undefined) {
|
||||
pdfjs.GlobalWorkerOptions.workerSrc = workerSrc;
|
||||
return pdfjs.GlobalWorkerOptions.workerSrc;
|
||||
}
|
||||
// if (!PDFParse.isNodeJS) {
|
||||
// pdfjs.GlobalWorkerOptions.workerSrc =
|
||||
// 'https://cdn.jsdelivr.net/npm/pdf-parse@latest/dist/browser/pdf.worker.min.mjs';
|
||||
// return pdfjs.GlobalWorkerOptions.workerSrc;
|
||||
// }
|
||||
return pdfjs.GlobalWorkerOptions.workerSrc;
|
||||
}
|
||||
// biome-ignore-end lint/suspicious/noExplicitAny: unsupported underline type
|
||||
/**
|
||||
* Load document-level metadata (info, outline, permissions, page labels) and optionally gather per-page link details.
|
||||
* @param params - Parse options; set `parsePageInfo` to collect per-page metadata described in `ParseParameters`.
|
||||
* @returns Aggregated document metadata in an `InfoResult`.
|
||||
*/
|
||||
async getInfo(params = {}) {
|
||||
const doc = await this.load();
|
||||
const result = new InfoResult(doc.numPages);
|
||||
const { info, metadata } = await doc.getMetadata();
|
||||
result.info = info;
|
||||
result.metadata = metadata;
|
||||
result.fingerprints = doc.fingerprints;
|
||||
result.outline = await doc.getOutline();
|
||||
result.permission = await doc.getPermissions();
|
||||
const pageLabels = await doc.getPageLabels();
|
||||
if (params.parsePageInfo) {
|
||||
for (let i = 1; i <= result.total; i++) {
|
||||
if (this.shouldParse(i, result.total, params)) {
|
||||
const page = await doc.getPage(i);
|
||||
const pageLinkResult = await this.getPageLinks(page);
|
||||
pageLinkResult.pageLabel = pageLabels?.[page.pageNumber];
|
||||
result.pages.push(pageLinkResult);
|
||||
page.cleanup();
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
async getPageLinks(page) {
|
||||
const viewport = page.getViewport({ scale: 1 });
|
||||
const result = {
|
||||
pageNumber: page.pageNumber,
|
||||
links: [],
|
||||
width: viewport.width,
|
||||
height: viewport.height,
|
||||
};
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <unsupported underline type>
|
||||
const annotations = (await page.getAnnotations({ intent: 'display' })) || [];
|
||||
for (const i of annotations) {
|
||||
if (i.subtype !== 'Link')
|
||||
continue;
|
||||
const url = i.url ?? i.unsafeUrl;
|
||||
if (!url)
|
||||
continue;
|
||||
const text = i.overlaidText || '';
|
||||
result.links.push({ url, text });
|
||||
}
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
* Extract plain text for each requested page, optionally enriching hyperlinks and enforcing line or cell separators.
|
||||
* @param params - Parse options controlling pagination, link handling, and line/cell thresholds.
|
||||
* @returns A `TextResult` containing page-wise text and a concatenated document string.
|
||||
*/
|
||||
async getText(params = {}) {
|
||||
const doc = await this.load();
|
||||
const result = new TextResult(doc.numPages);
|
||||
for (let i = 1; i <= result.total; i++) {
|
||||
if (this.shouldParse(i, result.total, params)) {
|
||||
const page = await doc.getPage(i);
|
||||
const text = await this.getPageText(page, params, result.total);
|
||||
result.pages.push({
|
||||
text: text,
|
||||
num: i,
|
||||
});
|
||||
page.cleanup();
|
||||
}
|
||||
}
|
||||
for (const page of result.pages) {
|
||||
if (params.pageJoiner) {
|
||||
let pageNumber = params.pageJoiner.replace('page_number', `${page.num}`);
|
||||
pageNumber = pageNumber.replace('total_number', `${result.total}`);
|
||||
result.text += `${page.text}\n${pageNumber}\n\n`;
|
||||
}
|
||||
else {
|
||||
result.text += `${page.text}\n\n`;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
async load() {
|
||||
try {
|
||||
if (this.doc === undefined) {
|
||||
const loadingTask = pdfjs.getDocument(this.options);
|
||||
loadingTask.onProgress = (progress) => {
|
||||
this.progress = progress;
|
||||
};
|
||||
this.doc = await loadingTask.promise;
|
||||
}
|
||||
return this.doc;
|
||||
}
|
||||
catch (error) {
|
||||
throw getException(error);
|
||||
}
|
||||
}
|
||||
shouldParse(currentPage, totalPage, params) {
|
||||
params.partial = params?.partial ?? [];
|
||||
params.first = params?.first ?? 0;
|
||||
params.last = params?.last ?? 0;
|
||||
// parse specific pages
|
||||
if (params.partial.length > 0) {
|
||||
if (params.partial.includes(currentPage)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// parse pagest beetween first..last
|
||||
if (params.first > 0 && params.last > 0) {
|
||||
if (currentPage >= params.first && currentPage <= params.last) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// parse first x page
|
||||
if (params.first > 0) {
|
||||
if (currentPage <= params.first) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// parse last x page
|
||||
if (params.last > 0) {
|
||||
if (currentPage > totalPage - params.last) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
async getPageText(page, parseParams, total) {
|
||||
const viewport = page.getViewport({ scale: 1 });
|
||||
const params = setDefaultParseParameters(parseParams);
|
||||
const textContent = await page.getTextContent({
|
||||
includeMarkedContent: !!params.includeMarkedContent,
|
||||
disableNormalization: !!params.disableNormalization,
|
||||
});
|
||||
let links = new Map();
|
||||
if (params.parseHyperlinks) {
|
||||
links = await this.getHyperlinks(page, viewport);
|
||||
}
|
||||
const strBuf = [];
|
||||
let lastX;
|
||||
let lastY;
|
||||
let lineHeight = 0;
|
||||
for (const item of textContent.items) {
|
||||
if (!('str' in item))
|
||||
continue;
|
||||
const tm = item.transform ?? item.transform;
|
||||
const [x, y] = viewport.convertToViewportPoint(tm[4], tm[5]);
|
||||
if (params.parseHyperlinks) {
|
||||
const posArr = links.get(item.str) || [];
|
||||
const hit = posArr.find((l) => x >= l.rect.left && x <= l.rect.right && y >= l.rect.top && y <= l.rect.bottom);
|
||||
if (hit) {
|
||||
item.str = `[${item.str}](${hit.url})`;
|
||||
}
|
||||
}
|
||||
if (params.lineEnforce) {
|
||||
if (lastY !== undefined && Math.abs(lastY - y) > params.lineThreshold) {
|
||||
const lastItem = strBuf.length ? strBuf[strBuf.length - 1] : undefined;
|
||||
const isCurrentItemHasNewLine = item.str.startsWith('\n') || (item.str.trim() === '' && item.hasEOL);
|
||||
if (lastItem?.endsWith('\n') === false && !isCurrentItemHasNewLine) {
|
||||
const ydiff = Math.abs(lastY - y);
|
||||
if (ydiff - 1 > lineHeight) {
|
||||
strBuf.push('\n');
|
||||
lineHeight = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (params.cellSeparator) {
|
||||
if (lastY !== undefined && Math.abs(lastY - y) < params.lineThreshold) {
|
||||
if (lastX !== undefined && Math.abs(lastX - x) > params.cellThreshold) {
|
||||
item.str = `${params.cellSeparator}${item.str}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
strBuf.push(item.str);
|
||||
lastX = x + item.width;
|
||||
lastY = y;
|
||||
lineHeight = Math.max(lineHeight, item.height);
|
||||
if (item.hasEOL) {
|
||||
strBuf.push('\n');
|
||||
}
|
||||
if (item.hasEOL || item.str.endsWith('\n')) {
|
||||
lineHeight = 0;
|
||||
}
|
||||
}
|
||||
if (params.itemJoiner) {
|
||||
return strBuf.join(params.itemJoiner);
|
||||
}
|
||||
return strBuf.join('');
|
||||
}
|
||||
async getHyperlinks(page, viewport) {
|
||||
const result = new Map();
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <unsupported underline type>
|
||||
const annotations = (await page.getAnnotations({ intent: 'display' })) || [];
|
||||
for (const i of annotations) {
|
||||
if (i.subtype !== 'Link')
|
||||
continue;
|
||||
const url = i.url ?? i.unsafeUrl;
|
||||
if (!url)
|
||||
continue;
|
||||
const text = i.overlaidText;
|
||||
if (!text)
|
||||
continue;
|
||||
const rectVp = viewport.convertToViewportRectangle(i.rect);
|
||||
const left = Math.min(rectVp[0], rectVp[2]) - 0.5;
|
||||
const top = Math.min(rectVp[1], rectVp[3]) - 0.5;
|
||||
const right = Math.max(rectVp[0], rectVp[2]) + 0.5;
|
||||
const bottom = Math.max(rectVp[1], rectVp[3]) + 0.5;
|
||||
const pos = { rect: { left, top, right, bottom }, url, text, used: false };
|
||||
const el = result.get(text);
|
||||
if (el) {
|
||||
el.push(pos);
|
||||
}
|
||||
else {
|
||||
result.set(text, [pos]);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
* Extract embedded images from requested pages.
|
||||
*
|
||||
* Behavior notes:
|
||||
* - Pages are selected according to ParseParameters (partial, first, last).
|
||||
* - Images smaller than `params.imageThreshold` (width OR height) are skipped.
|
||||
* - Returned ImageResult contains per-page PageImages; each image entry includes:
|
||||
* - data: Uint8Array (present when params.imageBuffer === true)
|
||||
* - dataUrl: string (present when params.imageDataUrl === true)
|
||||
* - width, height, kind, name
|
||||
* - Works in both Node.js (canvas.toBuffer) and browser (canvas.toDataURL) environments.
|
||||
*
|
||||
* @param params - ParseParameters controlling page selection, thresholds and output format.
|
||||
* @returns Promise<ImageResult> with extracted images grouped by page.
|
||||
*/
|
||||
async getImage(params = {}) {
|
||||
const doc = await this.load();
|
||||
const result = new ImageResult(doc.numPages);
|
||||
setDefaultParseParameters(params);
|
||||
for (let i = 1; i <= result.total; i++) {
|
||||
if (this.shouldParse(i, result.total, params)) {
|
||||
const page = await doc.getPage(i);
|
||||
const ops = await page.getOperatorList();
|
||||
const pageImages = { pageNumber: i, images: [] };
|
||||
result.pages.push(pageImages);
|
||||
for (let j = 0; j < ops.fnArray.length; j++) {
|
||||
if (ops.fnArray[j] === pdfjs.OPS.paintInlineImageXObject || ops.fnArray[j] === pdfjs.OPS.paintImageXObject) {
|
||||
const name = ops.argsArray[j][0];
|
||||
const isCommon = page.commonObjs.has(name);
|
||||
const imgPromise = isCommon
|
||||
? this.resolveEmbeddedImage(page.commonObjs, name)
|
||||
: this.resolveEmbeddedImage(page.objs, name);
|
||||
const { width, height, kind, data } = await imgPromise;
|
||||
if (params.imageThreshold) {
|
||||
if (params.imageThreshold >= width || params.imageThreshold >= height) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <underlying library does not contain valid typedefs>
|
||||
const canvasFactory = doc.canvasFactory;
|
||||
const canvasAndContext = canvasFactory.create(width, height);
|
||||
const context = canvasAndContext.context;
|
||||
let imgData = null;
|
||||
if (kind === pdfjs.ImageKind.RGBA_32BPP) {
|
||||
imgData = context.createImageData(width, height);
|
||||
imgData.data.set(data);
|
||||
}
|
||||
else {
|
||||
imgData = context.createImageData(width, height);
|
||||
this.convertToRGBA({
|
||||
src: data,
|
||||
dest: new Uint32Array(imgData.data.buffer),
|
||||
width,
|
||||
height,
|
||||
kind,
|
||||
});
|
||||
}
|
||||
context.putImageData(imgData, 0, 0);
|
||||
// Browser and Node.js compatibility
|
||||
let buffer = new Uint8Array();
|
||||
let dataUrl = '';
|
||||
if (typeof canvasAndContext.canvas.toBuffer === 'function') {
|
||||
// Node.js environment (canvas package)
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <underline lib not support>
|
||||
let nodeBuffer;
|
||||
if (params.imageBuffer) {
|
||||
nodeBuffer = canvasAndContext.canvas.toBuffer('image/png');
|
||||
buffer = new Uint8Array(nodeBuffer);
|
||||
}
|
||||
if (params.imageDataUrl) {
|
||||
if (nodeBuffer) {
|
||||
dataUrl = `data:image/png;base64,${nodeBuffer.toString('base64')}`;
|
||||
}
|
||||
else {
|
||||
nodeBuffer = canvasAndContext.canvas.toBuffer('image/png');
|
||||
buffer = new Uint8Array(nodeBuffer);
|
||||
dataUrl = `data:image/png;base64,${nodeBuffer.toString('base64')}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Browser environment
|
||||
if (params.imageBuffer) {
|
||||
const imageData = canvasAndContext.context.getImageData(0, 0, canvasAndContext.canvas.width, canvasAndContext.canvas.height);
|
||||
buffer = new Uint8Array(imageData.data);
|
||||
}
|
||||
if (params.imageDataUrl) {
|
||||
dataUrl = canvasAndContext.canvas.toDataURL('image/png');
|
||||
}
|
||||
}
|
||||
pageImages.images.push({
|
||||
data: buffer,
|
||||
dataUrl,
|
||||
name,
|
||||
height,
|
||||
width,
|
||||
kind,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
convertToRGBA({ src, dest, width, height, kind, }) {
|
||||
if (kind === pdfjs.ImageKind.RGB_24BPP) {
|
||||
// RGB 24-bit per pixel
|
||||
for (let i = 0, j = 0; i < src.length; i += 3, j++) {
|
||||
const r = src[i];
|
||||
const g = src[i + 1];
|
||||
const b = src[i + 2];
|
||||
dest[j] = (255 << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
else if (kind === pdfjs.ImageKind.GRAYSCALE_1BPP) {
|
||||
// Grayscale 1-bit per pixel
|
||||
let pixelIndex = 0;
|
||||
for (let i = 0; i < src.length; i++) {
|
||||
const byte = src[i];
|
||||
for (let bit = 7; bit >= 0; bit--) {
|
||||
if (pixelIndex >= width * height)
|
||||
break;
|
||||
const isWhite = ((byte >> bit) & 1) === 1;
|
||||
const gray = isWhite ? 255 : 0;
|
||||
dest[pixelIndex++] = (255 << 24) | (gray << 16) | (gray << 8) | gray;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (kind === undefined || kind === null) {
|
||||
// Unknown or undefined kind - try to infer from data length
|
||||
const bytesPerPixel = src.length / (width * height);
|
||||
if (Math.abs(bytesPerPixel - 3) < 0.1) {
|
||||
// Likely RGB 24BPP
|
||||
for (let i = 0, j = 0; i < src.length; i += 3, j++) {
|
||||
const r = src[i];
|
||||
const g = src[i + 1];
|
||||
const b = src[i + 2];
|
||||
dest[j] = (255 << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
else if (Math.abs(bytesPerPixel - 4) < 0.1) {
|
||||
// Likely RGBA 32BPP
|
||||
for (let i = 0, j = 0; i < src.length; i += 4, j++) {
|
||||
const r = src[i];
|
||||
const g = src[i + 1];
|
||||
const b = src[i + 2];
|
||||
const a = src[i + 3];
|
||||
dest[j] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
else if (Math.abs(bytesPerPixel - 1) < 0.1) {
|
||||
// Likely grayscale 8BPP
|
||||
for (let i = 0; i < src.length; i++) {
|
||||
const gray = src[i];
|
||||
dest[i] = (255 << 24) | (gray << 16) | (gray << 8) | gray;
|
||||
}
|
||||
}
|
||||
else {
|
||||
throw new Error(`convertToRGBA: Cannot infer image format. kind: ${kind}, bytesPerPixel: ${bytesPerPixel}, width: ${width}, height: ${height}, dataLength: ${src.length}`);
|
||||
}
|
||||
}
|
||||
else {
|
||||
throw new Error(`convertToRGBA: Unsupported image kind: ${kind}. Available kinds: GRAYSCALE_1BPP=${pdfjs.ImageKind.GRAYSCALE_1BPP}, RGB_24BPP=${pdfjs.ImageKind.RGB_24BPP}, RGBA_32BPP=${pdfjs.ImageKind.RGBA_32BPP}`);
|
||||
}
|
||||
}
|
||||
resolveEmbeddedImage(pdfObjects, name) {
|
||||
return new Promise((resolve, reject) => {
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <underlying library does not contain valid typedefs>
|
||||
pdfObjects.get(name, (imgData) => {
|
||||
if (imgData) {
|
||||
// Check different possible data sources
|
||||
let dataBuff;
|
||||
if (imgData.data instanceof Uint8Array) {
|
||||
dataBuff = imgData.data;
|
||||
}
|
||||
else if (imgData.data instanceof Uint8ClampedArray) {
|
||||
dataBuff = new Uint8Array(imgData.data);
|
||||
}
|
||||
else if (imgData.data?.buffer) {
|
||||
// Typed array with buffer
|
||||
dataBuff = new Uint8Array(imgData.data.buffer);
|
||||
}
|
||||
else if (imgData.bitmap) {
|
||||
// Some browsers might use bitmap
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <underlying library does not contain valid typedefs>
|
||||
const canvasFactory = this.doc.canvasFactory;
|
||||
const canvasAndContext = canvasFactory.create(imgData.bitmap.width, imgData.bitmap.height);
|
||||
canvasAndContext.context.drawImage(imgData.bitmap, 0, 0);
|
||||
const imageData = canvasAndContext.context.getImageData(0, 0, imgData.bitmap.width, imgData.bitmap.height);
|
||||
dataBuff = new Uint8Array(imageData.data.buffer);
|
||||
}
|
||||
else if (ArrayBuffer.isView(imgData.data)) {
|
||||
// Generic typed array
|
||||
dataBuff = new Uint8Array(imgData.data.buffer, imgData.data.byteOffset, imgData.data.byteLength);
|
||||
}
|
||||
if (!dataBuff) {
|
||||
reject(new Error(`Image object ${name}: data field is empty or invalid. Available fields: ${Object.keys(imgData).join(', ')}`));
|
||||
return;
|
||||
}
|
||||
if (dataBuff.length === 0) {
|
||||
reject(new Error(`Image object ${name}: data buffer is empty (length: 0)`));
|
||||
return;
|
||||
}
|
||||
resolve({ width: imgData.width, height: imgData.height, kind: imgData.kind, data: dataBuff });
|
||||
}
|
||||
else {
|
||||
reject(new Error(`Image object ${name} not found`));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Render pages to raster screenshots.
|
||||
*
|
||||
* Behavior notes:
|
||||
* - Pages are selected according to ParseParameters (partial, first, last).
|
||||
* - Use params.scale for zoom; if params.desiredWidth is specified it takes precedence.
|
||||
* - Each ScreenshotResult page contains:
|
||||
* - data: Uint8Array (when params.imageBuffer === true)
|
||||
* - dataUrl: string (when params.imageDataUrl === true)
|
||||
* - pageNumber, width, height, scale
|
||||
* - Works in both Node.js (canvas.toBuffer) and browser (canvas.toDataURL) environments.
|
||||
*
|
||||
* @param parseParams - ParseParameters controlling page selection and render options.
|
||||
* @returns Promise<ScreenshotResult> with rendered page images.
|
||||
*/
|
||||
async getScreenshot(parseParams = {}) {
|
||||
//const base = new URL('../../node_modules/pdfjs-dist/', import.meta.url);
|
||||
//this.options.cMapUrl = new URL('cmaps/', base).href;
|
||||
//this.options.cMapPacked = true;
|
||||
//this.options.standardFontDataUrl = new URL('legacy/build/standard_fonts/', base).href;
|
||||
const params = setDefaultParseParameters(parseParams);
|
||||
const doc = await this.load();
|
||||
const result = new ScreenshotResult(doc.numPages);
|
||||
if (this.doc === undefined) {
|
||||
throw new Error('PDF document not loaded');
|
||||
}
|
||||
for (let i = 1; i <= result.total; i++) {
|
||||
if (this.shouldParse(i, result.total, params)) {
|
||||
const page = await this.doc.getPage(i);
|
||||
let viewport = page.getViewport({ scale: params.scale });
|
||||
if (params.desiredWidth) {
|
||||
viewport = page.getViewport({ scale: 1 });
|
||||
// desiredWidth
|
||||
const scale = params.desiredWidth / viewport.width;
|
||||
viewport = page.getViewport({ scale: scale });
|
||||
}
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <underlying library does not contain valid typedefs>
|
||||
const canvasFactory = this.doc.canvasFactory;
|
||||
const canvasAndContext = canvasFactory.create(viewport.width, viewport.height);
|
||||
const renderContext = {
|
||||
canvasContext: canvasAndContext.context,
|
||||
viewport,
|
||||
canvas: canvasAndContext.canvas,
|
||||
};
|
||||
const renderTask = page.render(renderContext);
|
||||
await renderTask.promise;
|
||||
// Convert the canvas to an image buffer.
|
||||
let data = new Uint8Array();
|
||||
let dataUrl = '';
|
||||
if (typeof canvasAndContext.canvas.toBuffer === 'function') {
|
||||
// Node.js environment (canvas package)
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <underline lib not support>
|
||||
let nodeBuffer;
|
||||
if (params.imageBuffer) {
|
||||
nodeBuffer = canvasAndContext.canvas.toBuffer('image/png');
|
||||
data = new Uint8Array(nodeBuffer);
|
||||
}
|
||||
if (params.imageDataUrl) {
|
||||
if (nodeBuffer) {
|
||||
dataUrl = `data:image/png;base64,${nodeBuffer.toString('base64')}`;
|
||||
}
|
||||
else {
|
||||
nodeBuffer = canvasAndContext.canvas.toBuffer('image/png');
|
||||
data = new Uint8Array(nodeBuffer);
|
||||
dataUrl = `data:image/png;base64,${nodeBuffer.toString('base64')}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Browser environment
|
||||
if (params.imageBuffer) {
|
||||
const imageData = canvasAndContext.context.getImageData(0, 0, canvasAndContext.canvas.width, canvasAndContext.canvas.height);
|
||||
data = new Uint8Array(imageData.data);
|
||||
}
|
||||
if (params.imageDataUrl) {
|
||||
dataUrl = canvasAndContext.canvas.toDataURL('image/png');
|
||||
//const base64 = dataUrl.split(',')[1];
|
||||
//const binaryString = atob(base64);
|
||||
//data = new Uint8Array(binaryString.length);
|
||||
//for (let i = 0; i < binaryString.length; i++) {
|
||||
// data[i] = binaryString.charCodeAt(i);
|
||||
//}
|
||||
}
|
||||
}
|
||||
result.pages.push({
|
||||
data,
|
||||
dataUrl,
|
||||
pageNumber: i,
|
||||
width: viewport.width,
|
||||
height: viewport.height,
|
||||
scale: viewport.scale,
|
||||
});
|
||||
page.cleanup();
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
* Detect and extract tables from pages by analysing vector drawing operators, then populate cells with text.
|
||||
*
|
||||
* Behavior notes:
|
||||
* - Scans operator lists for rectangles/lines that form table grids (uses PathGeometry and LineStore).
|
||||
* - Normalizes detected geometry and matches positioned text to table cells.
|
||||
* - Honors ParseParameters for page selection.
|
||||
*
|
||||
* @param params - ParseParameters controlling which pages to analyse (partial/first/last).
|
||||
* @returns Promise<TableResult> containing discovered tables per page.
|
||||
*/
|
||||
async getTable(params = {}) {
|
||||
const doc = await this.load();
|
||||
const result = new TableResult(doc.numPages);
|
||||
if (this.doc === undefined) {
|
||||
throw new Error('PDF document not loaded');
|
||||
}
|
||||
for (let i = 1; i <= result.total; i++) {
|
||||
if (this.shouldParse(i, result.total, params)) {
|
||||
const page = await this.doc.getPage(i);
|
||||
//const viewport = page.getViewport({ scale: 1 });
|
||||
//viewport.convertToViewportPoint(0, 0);
|
||||
const store = await this.getPageTables(page);
|
||||
//const store = await this.getPageGeometry(page);
|
||||
store.normalize();
|
||||
const tableDataArr = store.getTableData();
|
||||
await this.fillPageTables(page, tableDataArr);
|
||||
const pageTableResult = { num: i, tables: [] };
|
||||
for (const table of tableDataArr) {
|
||||
//if (table.cellCount < 3) continue
|
||||
pageTableResult.tables.push(table.toArray());
|
||||
//const pageTableResult: PageTableResult = { num: i, tables: table.toArray() };
|
||||
//pageTableResult.tables.push(table.toData())
|
||||
}
|
||||
result.pages.push(pageTableResult);
|
||||
page.cleanup();
|
||||
}
|
||||
}
|
||||
// for (const table of Table.AllTables) {
|
||||
// if (table.cellCount < 3) continue
|
||||
// const str = table.toString()
|
||||
// console.log(str)
|
||||
// }
|
||||
return result;
|
||||
}
|
||||
getPathGeometry(mm) {
|
||||
const width = mm[2] - mm[0];
|
||||
const height = mm[3] - mm[1];
|
||||
if (mm[0] === Infinity) {
|
||||
return PathGeometry.undefined;
|
||||
}
|
||||
if (width > 5 && height > 5) {
|
||||
return PathGeometry.rectangle;
|
||||
}
|
||||
else if (width > 5 && height === 0) {
|
||||
return PathGeometry.hline;
|
||||
}
|
||||
else if (width === 0 && height > 5) {
|
||||
return PathGeometry.vline;
|
||||
}
|
||||
return PathGeometry.undefined;
|
||||
}
|
||||
async getPageTables(page) {
|
||||
const lineStore = new LineStore();
|
||||
const viewport = page.getViewport({ scale: 1 });
|
||||
let transformMatrix = [1, 0, 0, 1, 0, 0];
|
||||
const transformStack = [];
|
||||
const opList = await page.getOperatorList();
|
||||
for (let i = 0; i < opList.fnArray.length; i++) {
|
||||
const fn = opList.fnArray[i];
|
||||
const args = opList.argsArray[i];
|
||||
const op = args?.[0] ?? 0;
|
||||
const mm = args?.[2] ?? [Infinity, Infinity, -Infinity, -Infinity];
|
||||
//const minMax = new Float32Array([Infinity, Infinity, -Infinity, -Infinity]);
|
||||
if (fn === pdfjs.OPS.constructPath) {
|
||||
if (op === pdfjs.OPS.fill) {
|
||||
//debugger;
|
||||
}
|
||||
if (op !== pdfjs.OPS.stroke) {
|
||||
continue;
|
||||
}
|
||||
const pg = this.getPathGeometry(mm);
|
||||
if (pg === PathGeometry.rectangle) {
|
||||
const rect = new Rectangle(new Point(mm[0], mm[1]), mm[2] - mm[0], mm[3] - mm[1]);
|
||||
rect.transform(transformMatrix);
|
||||
rect.transform(viewport.transform);
|
||||
lineStore.addRectangle(rect);
|
||||
}
|
||||
else if (pg === PathGeometry.hline || pg === PathGeometry.vline) {
|
||||
const from = new Point(mm[0], mm[1]);
|
||||
const to = new Point(mm[2], mm[3]);
|
||||
const line = new Line(from, to);
|
||||
line.transform(transformMatrix);
|
||||
line.transform(viewport.transform);
|
||||
lineStore.add(line);
|
||||
}
|
||||
else {
|
||||
//debugger;
|
||||
}
|
||||
// if (op === pdfjs.OPS.rectangle) {
|
||||
// debugger;
|
||||
// } else if (op === pdfjs.OPS.moveTo) {
|
||||
// debugger;
|
||||
// } else if (op === pdfjs.OPS.lineTo) {
|
||||
// debugger;
|
||||
// } else if (op === pdfjs.OPS.endPath) {
|
||||
// const combinedMatrix = pdfjs.Util.transform(viewport.transform, transformMatrix);
|
||||
// // while (args[1].length) {
|
||||
// // const drawOp = args[1].shift();
|
||||
// // debugger;
|
||||
// // }
|
||||
// } else {
|
||||
// //debugger;
|
||||
// }
|
||||
}
|
||||
else if (fn === pdfjs.OPS.setLineWidth) {
|
||||
//debugger;
|
||||
}
|
||||
else if (fn === pdfjs.OPS.save) {
|
||||
transformStack.push(transformMatrix);
|
||||
}
|
||||
else if (fn === pdfjs.OPS.restore) {
|
||||
const restoredMatrix = transformStack.pop();
|
||||
if (restoredMatrix) {
|
||||
transformMatrix = restoredMatrix;
|
||||
}
|
||||
}
|
||||
else if (fn === pdfjs.OPS.transform) {
|
||||
//transformMatrix = this.transform_fn(transformMatrix, args);
|
||||
transformMatrix = pdfjs.Util.transform(transformMatrix, args);
|
||||
}
|
||||
}
|
||||
return lineStore;
|
||||
}
|
||||
// private async getPageGeometry(page: PDFPageProxy): Promise<LineStore> {
|
||||
// const lineStore: LineStore = new LineStore();
|
||||
// const opList = await page.getOperatorList();
|
||||
// const viewport = page.getViewport({ scale: 1 });
|
||||
// let transformMatrix = [1, 0, 0, 1, 0, 0];
|
||||
// const transformStack: Array<Array<number>> = [];
|
||||
// let current_x: number = 0;
|
||||
// let current_y: number = 0;
|
||||
// for (let j = 0; j < opList.fnArray.length; j++) {
|
||||
// const fn = opList.fnArray[j];
|
||||
// const args = opList.argsArray[j];
|
||||
// if (fn === pdfjs.OPS.constructPath) {
|
||||
// while (args[0].length) {
|
||||
// const op = args[0].shift();
|
||||
// const combinedMatrix = pdfjs.Util.transform(viewport.transform, transformMatrix);
|
||||
// if (op === pdfjs.OPS.rectangle) {
|
||||
// const x = args[1].shift();
|
||||
// const y = args[1].shift();
|
||||
// const width = args[1].shift();
|
||||
// const height = args[1].shift();
|
||||
// if (Math.min(width, height) <= 2) {
|
||||
// // TODO remove
|
||||
// debugger;
|
||||
// }
|
||||
// const rect = new Rectangle(new Point(x, y), width, height);
|
||||
// rect.transform(combinedMatrix);
|
||||
// //rect.transform(viewport.transform);
|
||||
// lineStore.addRectangle(rect);
|
||||
// } else if (op === pdfjs.OPS.moveTo) {
|
||||
// current_x = args[1].shift();
|
||||
// current_y = args[1].shift();
|
||||
// } else if (op === pdfjs.OPS.lineTo) {
|
||||
// const x = args[1].shift();
|
||||
// const y = args[1].shift();
|
||||
// //default trasform
|
||||
// const from = new Point(current_x, current_y);
|
||||
// const to = new Point(x, y);
|
||||
// const line = new Line(from, to);
|
||||
// line.transform(combinedMatrix);
|
||||
// //line.transform(viewport.transform);
|
||||
// // // viewport transform
|
||||
// // const _from = viewport.convertToViewportPoint(line.from.x, line.from.y)
|
||||
// // const _to = viewport.convertToViewportPoint(line.to.x, line.to.y)
|
||||
// //
|
||||
// // const transformedLine = new Line(new Point(_from[0], _from[1]), new Point(_to[0], _to[1]))
|
||||
// lineStore.add(line);
|
||||
// current_x = x;
|
||||
// current_y = y;
|
||||
// }
|
||||
// }
|
||||
// } else if (fn === pdfjs.OPS.save) {
|
||||
// transformStack.push(transformMatrix);
|
||||
// } else if (fn === pdfjs.OPS.restore) {
|
||||
// const restoredMatrix = transformStack.pop();
|
||||
// if (restoredMatrix) {
|
||||
// transformMatrix = restoredMatrix;
|
||||
// }
|
||||
// } else if (fn === pdfjs.OPS.transform) {
|
||||
// //transformMatrix = this.transform_fn(transformMatrix, args);
|
||||
// transformMatrix = pdfjs.Util.transform(transformMatrix, args);
|
||||
// }
|
||||
// }
|
||||
// return lineStore;
|
||||
// }
|
||||
async fillPageTables(page, pageTables) {
|
||||
//const resultTable: Array<Table> = []
|
||||
const viewport = page.getViewport({ scale: 1 });
|
||||
// for (let i = 0; i < pageTables.length; i++) {
|
||||
// const currentTable = pageTables[i]
|
||||
// }
|
||||
//pageTables = pageTables.filter((table) => table.cellCount > 3)
|
||||
const textContent = await page.getTextContent({
|
||||
includeMarkedContent: false,
|
||||
disableNormalization: false,
|
||||
});
|
||||
for (const textItem of textContent.items) {
|
||||
if (!('str' in textItem))
|
||||
continue;
|
||||
const tx = pdfjs.Util.transform(pdfjs.Util.transform(viewport.transform, textItem.transform), [1, 0, 0, -1, 0, 0]);
|
||||
//const resXY = viewport.convertToViewportPoint(tx[4], tx[5]);
|
||||
// textItem.transform = pdfjs.Util.transform(viewport.transform, textItem.transform)
|
||||
// textItem.transform[5] = viewport.height - textItem.transform[5] - textItem.height
|
||||
for (const pageTable of pageTables) {
|
||||
const cell = pageTable.findCell(tx[4], tx[5]);
|
||||
if (cell) {
|
||||
cell.text.push(textItem.str);
|
||||
if (textItem.hasEOL) {
|
||||
cell.text.push('\n');
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
//Table.tryAddText(pageTables, textItem)
|
||||
}
|
||||
}
|
||||
}
|
||||
//PDFParse.setWorker();
|
||||
//# sourceMappingURL=PDFParse.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/PDFParse.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/PDFParse.js.map
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
127
node_modules/pdf-parse/dist/pdf-parse/esm/ParseParameters.d.ts
generated
vendored
Normal file
127
node_modules/pdf-parse/dist/pdf-parse/esm/ParseParameters.d.ts
generated
vendored
Normal file
@@ -0,0 +1,127 @@
|
||||
/**
|
||||
* @public
|
||||
* ParseParameters
|
||||
* Options to control parsing behavior and output formatting.
|
||||
*/
|
||||
export interface ParseParameters {
|
||||
/**
|
||||
* Array of page numbers to parse.
|
||||
* When provided, only these pages will be parsed and returned in the same order.
|
||||
* Example: [1, 3, 5]. Parse only one page: [7].
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
partial?: Array<number>;
|
||||
/**
|
||||
* Parse the first N pages (pages 1..N).
|
||||
* Ignored when `partial` is provided. If both `first` and `last` are set, they define
|
||||
* an explicit inclusive page range (first..last) and this "first N" semantics is ignored.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
first?: number;
|
||||
/**
|
||||
* Parse the last N pages (pages total-N+1..total).
|
||||
* Ignored when `partial` is provided. If both `first` and `last` are set, they define
|
||||
* an explicit inclusive page range (first..last) and this "last N" semantics is ignored.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
last?: number;
|
||||
/**
|
||||
* Collect per-page metadata such as embedded links, title, pageLabel, and dimensions;
|
||||
* ISBN, DOI, abstract, and references are work in progress when getInfo() is used.
|
||||
* Default: `false`.
|
||||
*/
|
||||
parsePageInfo?: boolean;
|
||||
/**
|
||||
* Attempt to detect and include hyperlink annotations (e.g. URLs) associated with text.
|
||||
* Detected links are formatted as Markdown inline links (for example: [text](https://example.com)).
|
||||
* Default: `false`.
|
||||
*/
|
||||
parseHyperlinks?: boolean;
|
||||
/**
|
||||
* Enforce logical line breaks by inserting a newline when the vertical distance
|
||||
* between text items exceeds `lineThreshold`.
|
||||
* Useful to preserve paragraph/line structure when text items are emitted as separate segments.
|
||||
* Default: `true`.
|
||||
*/
|
||||
lineEnforce?: boolean;
|
||||
/**
|
||||
* Threshold to decide whether nearby text items belong to different lines.
|
||||
* Larger values make the parser more likely to start a new line between items.
|
||||
* Default: `4.6`.
|
||||
*/
|
||||
lineThreshold?: number;
|
||||
/**
|
||||
* String inserted between text items on the same line when a sufficiently large horizontal gap is detected.
|
||||
* Typically used to emulate a cell/column separator (for example, "\\t" for tabs).
|
||||
* Default: `'\t'`.
|
||||
*/
|
||||
cellSeparator?: string;
|
||||
/**
|
||||
* Horizontal distance threshold to decide when two text items on the same baseline should be treated as separate cells.
|
||||
* Larger value produces fewer (wider) cells; smaller value creates more cell breaks.
|
||||
* Default: `7`.
|
||||
*/
|
||||
cellThreshold?: number;
|
||||
/**
|
||||
* Optional string appended at the end of each page's extracted text to mark page boundaries.
|
||||
* Supports placeholders `page_number` and `total_number` which are substituted accordingly.
|
||||
* If omitted or empty, no page boundary marker is added.
|
||||
* Default: `'\n-- page_number of total_number --'`.
|
||||
*/
|
||||
pageJoiner?: string;
|
||||
/**
|
||||
* Optional string used to join text items when returning a page's text.
|
||||
* If provided, this value is used instead of the default empty-string joining behavior.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
itemJoiner?: string;
|
||||
/**
|
||||
* Minimum image dimension (in pixels) for width or height.
|
||||
* When set, images where width OR height are below or equal this value will be ignored by `getImage()`.
|
||||
* Useful for excluding tiny decorative or tracking images.
|
||||
* Default: `80`.
|
||||
* Disable: `0`.
|
||||
*/
|
||||
imageThreshold?: number;
|
||||
/**
|
||||
* Screenshot scale factor: use 1 for the original size, 1.5 for a 50% larger image, etc.
|
||||
* Default: `1`.
|
||||
*/
|
||||
scale?: number;
|
||||
/**
|
||||
* Desired screenshot width in pixels.
|
||||
* When set, the scale option is ignored.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
desiredWidth?: number;
|
||||
/**
|
||||
* Applies to both getImage() and getScreenshot(): include the image as a base64 data URL string.
|
||||
* Default: `true`.
|
||||
*/
|
||||
imageDataUrl?: boolean;
|
||||
/**
|
||||
* Applies to both getImage() and getScreenshot(): include the image as a binary buffer.
|
||||
* Default: `true`.
|
||||
*/
|
||||
imageBuffer?: boolean;
|
||||
/**
|
||||
* Include marked content items in the items array of TextContent to capture PDF "marked content".
|
||||
* Enables tags (MCID, role/props) and structural/accessibility information useful for mapping text ↔ structure.
|
||||
* For plain text extraction it's usually false (trade-off: larger output).
|
||||
* Default: `false`.
|
||||
*/
|
||||
includeMarkedContent?: boolean;
|
||||
/**
|
||||
* When true, text normalization is NOT performed in the worker thread.
|
||||
* For plain text extraction, normalizing in the worker (false) is usually recommended.
|
||||
* Default: `false`.
|
||||
*/
|
||||
disableNormalization?: boolean;
|
||||
}
|
||||
/**
|
||||
* @public
|
||||
* SafeParseParameters
|
||||
*/
|
||||
export type SafeParseParameters = Required<Pick<ParseParameters, 'lineThreshold' | 'cellThreshold' | 'scale'>> & ParseParameters;
|
||||
export declare function setDefaultParseParameters(params: ParseParameters): SafeParseParameters;
|
||||
//# sourceMappingURL=ParseParameters.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/ParseParameters.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/ParseParameters.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"ParseParameters.d.ts","sourceRoot":"","sources":["../../../src/pdf-parse/ParseParameters.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,MAAM,WAAW,eAAe;IAC/B;;;;;OAKG;IACH,OAAO,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAExB;;;;;OAKG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;;;;OAKG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd;;;;OAIG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IAExB;;;;OAIG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;IAE1B;;;;;OAKG;IACH,WAAW,CAAC,EAAE,OAAO,CAAC;IAEtB;;;;OAIG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;OAIG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;OAIG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;;OAKG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB;;;;OAIG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;OAGG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IAEvB;;;OAGG;IACH,WAAW,CAAC,EAAE,OAAO,CAAC;IAEtB;;;;;OAKG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAE/B;;;;OAIG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAC;CAC/B;AAED;;;GAGG;AACH,MAAM,MAAM,mBAAmB,GAAG,QAAQ,CAAC,IAAI,CAAC,eAAe,EAAE,eAAe,GAAG,eAAe,GAAG,OAAO,CAAC,CAAC,GAC7G,eAAe,CAAC;AAEjB,wBAAgB,yBAAyB,CAAC,MAAM,EAAE,eAAe,GAAG,mBAAmB,CAatF"}
|
||||
13
node_modules/pdf-parse/dist/pdf-parse/esm/ParseParameters.js
generated
vendored
Normal file
13
node_modules/pdf-parse/dist/pdf-parse/esm/ParseParameters.js
generated
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
export function setDefaultParseParameters(params) {
|
||||
params.lineThreshold = params?.lineThreshold ?? 4.6;
|
||||
params.cellThreshold = params?.cellThreshold ?? 7;
|
||||
params.cellSeparator = params?.cellSeparator ?? '\t';
|
||||
params.lineEnforce = params?.lineEnforce ?? true;
|
||||
params.pageJoiner = params?.pageJoiner ?? '\n-- page_number of total_number --';
|
||||
params.imageThreshold = params?.imageThreshold ?? 80;
|
||||
params.imageDataUrl = params?.imageDataUrl ?? true;
|
||||
params.imageBuffer = params?.imageBuffer ?? true;
|
||||
params.scale = params?.scale ?? 1;
|
||||
return params;
|
||||
}
|
||||
//# sourceMappingURL=ParseParameters.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/ParseParameters.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/ParseParameters.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"ParseParameters.js","sourceRoot":"","sources":["../../../src/pdf-parse/ParseParameters.ts"],"names":[],"mappings":"AAiJA,MAAM,UAAU,yBAAyB,CAAC,MAAuB;IAChE,MAAM,CAAC,aAAa,GAAG,MAAM,EAAE,aAAa,IAAI,GAAG,CAAC;IACpD,MAAM,CAAC,aAAa,GAAG,MAAM,EAAE,aAAa,IAAI,CAAC,CAAC;IAClD,MAAM,CAAC,aAAa,GAAG,MAAM,EAAE,aAAa,IAAI,IAAI,CAAC;IACrD,MAAM,CAAC,WAAW,GAAG,MAAM,EAAE,WAAW,IAAI,IAAI,CAAC;IACjD,MAAM,CAAC,UAAU,GAAG,MAAM,EAAE,UAAU,IAAI,qCAAqC,CAAC;IAChF,MAAM,CAAC,cAAc,GAAG,MAAM,EAAE,cAAc,IAAI,EAAE,CAAC;IAErD,MAAM,CAAC,YAAY,GAAG,MAAM,EAAE,YAAY,IAAI,IAAI,CAAC;IACnD,MAAM,CAAC,WAAW,GAAG,MAAM,EAAE,WAAW,IAAI,IAAI,CAAC;IACjD,MAAM,CAAC,KAAK,GAAG,MAAM,EAAE,KAAK,IAAI,CAAC,CAAC;IAElC,OAAO,MAA6B,CAAC;AACtC,CAAC"}
|
||||
15
node_modules/pdf-parse/dist/pdf-parse/esm/PathGeometry.d.ts
generated
vendored
Normal file
15
node_modules/pdf-parse/dist/pdf-parse/esm/PathGeometry.d.ts
generated
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
export type MinMax = [number, number, number, number];
|
||||
export declare enum PathGeometry {
|
||||
undefined = 0,
|
||||
hline = 1,
|
||||
vline = 2,
|
||||
rectangle = 3
|
||||
}
|
||||
export declare enum DrawOPS {
|
||||
moveTo = 0,
|
||||
lineTo = 1,
|
||||
curveTo = 2,
|
||||
closePath = 3,
|
||||
rectangle = 4
|
||||
}
|
||||
//# sourceMappingURL=PathGeometry.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/PathGeometry.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/PathGeometry.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"PathGeometry.d.ts","sourceRoot":"","sources":["../../../src/pdf-parse/PathGeometry.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,MAAM,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;AAEtD,oBAAY,YAAY;IACvB,SAAS,IAAI;IACb,KAAK,IAAI;IACT,KAAK,IAAI;IACT,SAAS,IAAI;CACb;AAED,oBAAY,OAAO;IAClB,MAAM,IAAI;IACV,MAAM,IAAI;IACV,OAAO,IAAI;IACX,SAAS,IAAI;IACb,SAAS,IAAI;CACb"}
|
||||
16
node_modules/pdf-parse/dist/pdf-parse/esm/PathGeometry.js
generated
vendored
Normal file
16
node_modules/pdf-parse/dist/pdf-parse/esm/PathGeometry.js
generated
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
export var PathGeometry;
|
||||
(function (PathGeometry) {
|
||||
PathGeometry[PathGeometry["undefined"] = 0] = "undefined";
|
||||
PathGeometry[PathGeometry["hline"] = 1] = "hline";
|
||||
PathGeometry[PathGeometry["vline"] = 2] = "vline";
|
||||
PathGeometry[PathGeometry["rectangle"] = 3] = "rectangle";
|
||||
})(PathGeometry || (PathGeometry = {}));
|
||||
export var DrawOPS;
|
||||
(function (DrawOPS) {
|
||||
DrawOPS[DrawOPS["moveTo"] = 0] = "moveTo";
|
||||
DrawOPS[DrawOPS["lineTo"] = 1] = "lineTo";
|
||||
DrawOPS[DrawOPS["curveTo"] = 2] = "curveTo";
|
||||
DrawOPS[DrawOPS["closePath"] = 3] = "closePath";
|
||||
DrawOPS[DrawOPS["rectangle"] = 4] = "rectangle";
|
||||
})(DrawOPS || (DrawOPS = {}));
|
||||
//# sourceMappingURL=PathGeometry.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/PathGeometry.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/PathGeometry.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"PathGeometry.js","sourceRoot":"","sources":["../../../src/pdf-parse/PathGeometry.ts"],"names":[],"mappings":"AAEA,MAAM,CAAN,IAAY,YAKX;AALD,WAAY,YAAY;IACvB,yDAAa,CAAA;IACb,iDAAS,CAAA;IACT,iDAAS,CAAA;IACT,yDAAa,CAAA;AACd,CAAC,EALW,YAAY,KAAZ,YAAY,QAKvB;AAED,MAAM,CAAN,IAAY,OAMX;AAND,WAAY,OAAO;IAClB,yCAAU,CAAA;IACV,yCAAU,CAAA;IACV,2CAAW,CAAA;IACX,+CAAa,CAAA;IACb,+CAAa,CAAA;AACd,CAAC,EANW,OAAO,KAAP,OAAO,QAMlB"}
|
||||
22
node_modules/pdf-parse/dist/pdf-parse/esm/ScreenshotResult.d.ts
generated
vendored
Normal file
22
node_modules/pdf-parse/dist/pdf-parse/esm/ScreenshotResult.d.ts
generated
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* @public
|
||||
* Screenshot
|
||||
*/
|
||||
export interface Screenshot {
|
||||
data: Uint8Array;
|
||||
dataUrl: string;
|
||||
pageNumber: number;
|
||||
width: number;
|
||||
height: number;
|
||||
scale: number;
|
||||
}
|
||||
/**
|
||||
* @public
|
||||
* ScreenshotResult
|
||||
*/
|
||||
export declare class ScreenshotResult {
|
||||
pages: Array<Screenshot>;
|
||||
total: number;
|
||||
constructor(total: number);
|
||||
}
|
||||
//# sourceMappingURL=ScreenshotResult.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/ScreenshotResult.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/ScreenshotResult.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"ScreenshotResult.d.ts","sourceRoot":"","sources":["../../../src/pdf-parse/ScreenshotResult.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,WAAW,UAAU;IAE1B,IAAI,EAAE,UAAU,CAAC;IAGjB,OAAO,EAAE,MAAM,CAAC;IAEhB,UAAU,EAAE,MAAM,CAAC;IAEnB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;CACd;AAED;;;GAGG;AACH,qBAAa,gBAAgB;IAC5B,KAAK,EAAE,KAAK,CAAC,UAAU,CAAC,CAAM;IAC9B,KAAK,EAAE,MAAM,CAAK;gBAEN,KAAK,EAAE,MAAM;CAGzB"}
|
||||
12
node_modules/pdf-parse/dist/pdf-parse/esm/ScreenshotResult.js
generated
vendored
Normal file
12
node_modules/pdf-parse/dist/pdf-parse/esm/ScreenshotResult.js
generated
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
/**
|
||||
* @public
|
||||
* ScreenshotResult
|
||||
*/
|
||||
export class ScreenshotResult {
|
||||
pages = [];
|
||||
total = 0;
|
||||
constructor(total) {
|
||||
this.total = total;
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=ScreenshotResult.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/ScreenshotResult.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/ScreenshotResult.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"ScreenshotResult.js","sourceRoot":"","sources":["../../../src/pdf-parse/ScreenshotResult.ts"],"names":[],"mappings":"AAkBA;;;GAGG;AACH,MAAM,OAAO,gBAAgB;IAC5B,KAAK,GAAsB,EAAE,CAAC;IAC9B,KAAK,GAAW,CAAC,CAAC;IAElB,YAAY,KAAa;QACxB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACpB,CAAC;CACD"}
|
||||
20
node_modules/pdf-parse/dist/pdf-parse/esm/TableResult.d.ts
generated
vendored
Normal file
20
node_modules/pdf-parse/dist/pdf-parse/esm/TableResult.d.ts
generated
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
export type TableArray = Array<Array<string>>;
|
||||
/**
|
||||
* @public
|
||||
* PageTableResult
|
||||
*/
|
||||
export interface PageTableResult {
|
||||
num: number;
|
||||
tables: TableArray[];
|
||||
}
|
||||
/**
|
||||
* @public
|
||||
* TableResult
|
||||
*/
|
||||
export declare class TableResult {
|
||||
pages: Array<PageTableResult>;
|
||||
mergedTables: TableArray[];
|
||||
total: number;
|
||||
constructor(total: number);
|
||||
}
|
||||
//# sourceMappingURL=TableResult.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/TableResult.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/TableResult.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"TableResult.d.ts","sourceRoot":"","sources":["../../../src/pdf-parse/TableResult.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;AAE9C;;;GAGG;AACH,MAAM,WAAW,eAAe;IAC/B,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,UAAU,EAAE,CAAC;CACrB;AAED;;;GAGG;AACH,qBAAa,WAAW;IACvB,KAAK,EAAE,KAAK,CAAC,eAAe,CAAC,CAAM;IACnC,YAAY,EAAE,UAAU,EAAE,CAAM;IAChC,KAAK,EAAE,MAAM,CAAK;gBAEN,KAAK,EAAE,MAAM;CAGzB"}
|
||||
13
node_modules/pdf-parse/dist/pdf-parse/esm/TableResult.js
generated
vendored
Normal file
13
node_modules/pdf-parse/dist/pdf-parse/esm/TableResult.js
generated
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
/**
|
||||
* @public
|
||||
* TableResult
|
||||
*/
|
||||
export class TableResult {
|
||||
pages = [];
|
||||
mergedTables = [];
|
||||
total = 0;
|
||||
constructor(total) {
|
||||
this.total = total;
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=TableResult.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/TableResult.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/TableResult.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"TableResult.js","sourceRoot":"","sources":["../../../src/pdf-parse/TableResult.ts"],"names":[],"mappings":"AAWA;;;GAGG;AACH,MAAM,OAAO,WAAW;IACvB,KAAK,GAA2B,EAAE,CAAC;IACnC,YAAY,GAAiB,EAAE,CAAC;IAChC,KAAK,GAAW,CAAC,CAAC;IAElB,YAAY,KAAa;QACxB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACpB,CAAC;CACD"}
|
||||
35
node_modules/pdf-parse/dist/pdf-parse/esm/TextResult.d.ts
generated
vendored
Normal file
35
node_modules/pdf-parse/dist/pdf-parse/esm/TextResult.d.ts
generated
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* @public
|
||||
* HyperlinkPosition
|
||||
*/
|
||||
export type HyperlinkPosition = {
|
||||
rect: {
|
||||
left: number;
|
||||
top: number;
|
||||
right: number;
|
||||
bottom: number;
|
||||
};
|
||||
url: string;
|
||||
text: string;
|
||||
used: boolean;
|
||||
};
|
||||
/**
|
||||
* @public
|
||||
* PageTextResult
|
||||
*/
|
||||
export interface PageTextResult {
|
||||
num: number;
|
||||
text: string;
|
||||
}
|
||||
/**
|
||||
* @public
|
||||
* TextResult
|
||||
*/
|
||||
export declare class TextResult {
|
||||
pages: Array<PageTextResult>;
|
||||
text: string;
|
||||
total: number;
|
||||
getPageText(num: number): string;
|
||||
constructor(total: number);
|
||||
}
|
||||
//# sourceMappingURL=TextResult.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/TextResult.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/TextResult.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"TextResult.d.ts","sourceRoot":"","sources":["../../../src/pdf-parse/TextResult.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,MAAM,iBAAiB,GAAG;IAC/B,IAAI,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IACnE,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,OAAO,CAAC;CACd,CAAC;AAEF;;;GAGG;AACH,MAAM,WAAW,cAAc;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;GAGG;AACH,qBAAa,UAAU;IACtB,KAAK,EAAE,KAAK,CAAC,cAAc,CAAC,CAAM;IAClC,IAAI,EAAE,MAAM,CAAM;IAClB,KAAK,EAAE,MAAM,CAAK;IAEX,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;gBAO3B,KAAK,EAAE,MAAM;CAGzB"}
|
||||
20
node_modules/pdf-parse/dist/pdf-parse/esm/TextResult.js
generated
vendored
Normal file
20
node_modules/pdf-parse/dist/pdf-parse/esm/TextResult.js
generated
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
/**
|
||||
* @public
|
||||
* TextResult
|
||||
*/
|
||||
export class TextResult {
|
||||
pages = [];
|
||||
text = '';
|
||||
total = 0;
|
||||
getPageText(num) {
|
||||
for (const pageData of this.pages) {
|
||||
if (pageData.num === num)
|
||||
return pageData.text;
|
||||
}
|
||||
return '';
|
||||
}
|
||||
constructor(total) {
|
||||
this.total = total;
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=TextResult.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/TextResult.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/TextResult.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"TextResult.js","sourceRoot":"","sources":["../../../src/pdf-parse/TextResult.ts"],"names":[],"mappings":"AAoBA;;;GAGG;AACH,MAAM,OAAO,UAAU;IACtB,KAAK,GAA0B,EAAE,CAAC;IAClC,IAAI,GAAW,EAAE,CAAC;IAClB,KAAK,GAAW,CAAC,CAAC;IAEX,WAAW,CAAC,GAAW;QAC7B,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACnC,IAAI,QAAQ,CAAC,GAAG,KAAK,GAAG;gBAAE,OAAO,QAAQ,CAAC,IAAI,CAAC;QAChD,CAAC;QACD,OAAO,EAAE,CAAC;IACX,CAAC;IAED,YAAY,KAAa;QACxB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACpB,CAAC;CACD"}
|
||||
26
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Line.d.ts
generated
vendored
Normal file
26
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Line.d.ts
generated
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
import { Point } from './Point.js';
|
||||
import { Shape } from './Shape.js';
|
||||
export declare enum LineDirection {
|
||||
None = 0,
|
||||
Horizontal = 1,
|
||||
Vertical = 2
|
||||
}
|
||||
export declare class Line extends Shape {
|
||||
from: Point;
|
||||
to: Point;
|
||||
direction: LineDirection;
|
||||
length: number;
|
||||
intersections: Array<Point>;
|
||||
gaps: Array<Line>;
|
||||
constructor(from: Point, to: Point);
|
||||
private init;
|
||||
private _valid;
|
||||
get valid(): boolean;
|
||||
get normalized(): Line;
|
||||
addGap(line: Line): void;
|
||||
containsPoint(p: Point): boolean;
|
||||
addIntersectionPoint(point: Point): void;
|
||||
intersection(line: Line): Point | undefined;
|
||||
transform(matrix: Array<number>): this;
|
||||
}
|
||||
//# sourceMappingURL=Line.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Line.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Line.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"Line.d.ts","sourceRoot":"","sources":["../../../../src/pdf-parse/geometry/Line.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAEnC,oBAAY,aAAa;IACxB,IAAI,IAAI;IACR,UAAU,IAAI;IACd,QAAQ,IAAA;CACR;AAED,qBAAa,IAAK,SAAQ,KAAK;IACvB,IAAI,EAAE,KAAK,CAAC;IACZ,EAAE,EAAE,KAAK,CAAC;IACV,SAAS,EAAE,aAAa,CAAsB;IAC9C,MAAM,EAAE,MAAM,CAAK;IACnB,aAAa,EAAE,KAAK,CAAC,KAAK,CAAC,CAAM;IACjC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,CAAM;gBAElB,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK;IAOlC,OAAO,CAAC,IAAI;IA4BZ,OAAO,CAAC,MAAM,CAAkC;IAEhD,IAAI,KAAK,IAAI,OAAO,CAKnB;IAED,IAAI,UAAU,IAAI,IAAI,CAarB;IAEM,MAAM,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;IAIxB,aAAa,CAAC,CAAC,EAAE,KAAK,GAAG,OAAO;IAoBhC,oBAAoB,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI;IAOxC,YAAY,CAAC,IAAI,EAAE,IAAI,GAAG,KAAK,GAAG,SAAS;IAiD3C,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,GAAG,IAAI;CAe7C"}
|
||||
146
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Line.js
generated
vendored
Normal file
146
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Line.js
generated
vendored
Normal file
@@ -0,0 +1,146 @@
|
||||
import { Point } from './Point.js';
|
||||
import { Shape } from './Shape.js';
|
||||
export var LineDirection;
|
||||
(function (LineDirection) {
|
||||
LineDirection[LineDirection["None"] = 0] = "None";
|
||||
LineDirection[LineDirection["Horizontal"] = 1] = "Horizontal";
|
||||
LineDirection[LineDirection["Vertical"] = 2] = "Vertical";
|
||||
})(LineDirection || (LineDirection = {}));
|
||||
export class Line extends Shape {
|
||||
from;
|
||||
to;
|
||||
direction = LineDirection.None;
|
||||
length = 0;
|
||||
intersections = [];
|
||||
gaps = [];
|
||||
constructor(from, to) {
|
||||
super();
|
||||
this.from = from;
|
||||
this.to = to;
|
||||
this.init();
|
||||
}
|
||||
init() {
|
||||
let from = this.from;
|
||||
let to = this.to;
|
||||
if (Math.abs(from.y - to.y) < Shape.tolerance) {
|
||||
this.direction = LineDirection.Horizontal;
|
||||
to.y = from.y;
|
||||
if (from.x > to.x) {
|
||||
const temp = from;
|
||||
from = to;
|
||||
to = temp;
|
||||
}
|
||||
this.length = to.x - from.x;
|
||||
}
|
||||
else if (Math.abs(from.x - to.x) < Shape.tolerance) {
|
||||
this.direction = LineDirection.Vertical;
|
||||
to.x = from.x;
|
||||
if (from.y > to.y) {
|
||||
const temp = from;
|
||||
from = to;
|
||||
to = temp;
|
||||
}
|
||||
this.length = to.y - from.y;
|
||||
}
|
||||
this.from = from;
|
||||
this.to = to;
|
||||
}
|
||||
_valid = undefined;
|
||||
get valid() {
|
||||
if (this._valid === undefined) {
|
||||
this._valid = this.direction !== LineDirection.None && this.length > Shape.tolerance;
|
||||
}
|
||||
return this._valid;
|
||||
}
|
||||
get normalized() {
|
||||
if (this.direction === LineDirection.Horizontal) {
|
||||
return new Line(new Point(this.from.x - Shape.tolerance, this.from.y), new Point(this.to.x + Shape.tolerance, this.from.y));
|
||||
}
|
||||
else if (this.direction === LineDirection.Vertical) {
|
||||
return new Line(new Point(this.from.x, this.from.y - Shape.tolerance), new Point(this.from.x, this.to.y + Shape.tolerance));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
addGap(line) {
|
||||
this.gaps.push(line);
|
||||
}
|
||||
containsPoint(p) {
|
||||
if (this.direction === LineDirection.Vertical) {
|
||||
return this.from.x === p.x && p.y >= this.from.y && p.y <= this.to.y;
|
||||
}
|
||||
else if (this.direction === LineDirection.Horizontal) {
|
||||
return this.from.y === p.y && p.x >= this.from.x && p.x <= this.to.x;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// // todo implement
|
||||
// public containsLine(l:Line):boolean{
|
||||
// if(this.direction === LineDirection.Vertical && l.direction === LineDirection.Vertical){
|
||||
// return this.from.x === l.from.x
|
||||
// }
|
||||
// else if(this.direction === LineDirection.Horizontal && l.direction === LineDirection.Horizontal){
|
||||
// return this.from.y === l.from.y
|
||||
// }
|
||||
// return false
|
||||
// }
|
||||
addIntersectionPoint(point) {
|
||||
for (const intPoint of this.intersections) {
|
||||
if (intPoint.equal(point))
|
||||
return;
|
||||
}
|
||||
this.intersections.push(point);
|
||||
}
|
||||
intersection(line) {
|
||||
let result;
|
||||
if (!this.valid || !line.valid) {
|
||||
return result;
|
||||
}
|
||||
const thisNormalized = this.normalized;
|
||||
const lineNormalized = line.normalized;
|
||||
if (this.direction === LineDirection.Horizontal && line.direction === LineDirection.Vertical) {
|
||||
const x = lineNormalized.from.x;
|
||||
const y = thisNormalized.from.y;
|
||||
const isOk = x > thisNormalized.from.x && x < thisNormalized.to.x && y > lineNormalized.from.y && y < lineNormalized.to.y;
|
||||
if (isOk) {
|
||||
const intPoint = new Point(x, y);
|
||||
this.addIntersectionPoint(intPoint);
|
||||
line.addIntersectionPoint(intPoint);
|
||||
result = intPoint;
|
||||
}
|
||||
}
|
||||
else if (this.direction === LineDirection.Vertical && line.direction === LineDirection.Horizontal) {
|
||||
const x = thisNormalized.from.x;
|
||||
const y = lineNormalized.from.y;
|
||||
const isOk = x > lineNormalized.from.x && x < lineNormalized.to.x && y > thisNormalized.from.y && y < thisNormalized.to.y;
|
||||
if (isOk) {
|
||||
const intPoint = new Point(x, y);
|
||||
this.addIntersectionPoint(intPoint);
|
||||
line.addIntersectionPoint(intPoint);
|
||||
result = intPoint;
|
||||
}
|
||||
}
|
||||
// if(result){
|
||||
// for (const gapLine of this.gaps) {
|
||||
// if(gapLine.containsPoint(result)) return undefined
|
||||
// }
|
||||
//
|
||||
// for (const gapLine of line.gaps) {
|
||||
// if(gapLine.containsPoint(result)) return undefined
|
||||
// }
|
||||
// }
|
||||
return result;
|
||||
}
|
||||
transform(matrix) {
|
||||
const p1 = this.from.transform(matrix);
|
||||
const p2 = this.to.transform(matrix);
|
||||
const x = Math.min(p1.x, p2.x);
|
||||
const y = Math.min(p1.y, p2.y);
|
||||
const width = Math.abs(p1.x - p2.x);
|
||||
const height = Math.abs(p1.y - p2.y);
|
||||
this.from = new Point(x, y);
|
||||
this.to = new Point(x + width, y + height);
|
||||
this.init();
|
||||
return this;
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=Line.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Line.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Line.js.map
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
20
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/LineStore.d.ts
generated
vendored
Normal file
20
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/LineStore.d.ts
generated
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
import { Line } from './Line.js';
|
||||
import type { Rectangle } from './Rectangle.js';
|
||||
import { Table } from './Table.js';
|
||||
import type { TableData } from './TableData.js';
|
||||
export declare class LineStore {
|
||||
hLines: Array<Line>;
|
||||
vLines: Array<Line>;
|
||||
add(line: Line): void;
|
||||
addRectangle(rect: Rectangle): void;
|
||||
getTableData(): Array<TableData>;
|
||||
getTables(): Array<Table>;
|
||||
normalize(): void;
|
||||
normalizeHorizontal(): void;
|
||||
normalizeVertical(): void;
|
||||
private fillTable;
|
||||
private tryFill;
|
||||
private margeHorizontalLines;
|
||||
private margeVerticalLines;
|
||||
}
|
||||
//# sourceMappingURL=LineStore.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/LineStore.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/LineStore.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"LineStore.d.ts","sourceRoot":"","sources":["../../../../src/pdf-parse/geometry/LineStore.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAiB,MAAM,WAAW,CAAC;AAEhD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAEhD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAEhD,qBAAa,SAAS;IACd,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,CAAM;IACzB,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,CAAM;IAEzB,GAAG,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;IAUrB,YAAY,CAAC,IAAI,EAAE,SAAS,GAAG,IAAI;IAMnC,YAAY,IAAI,KAAK,CAAC,SAAS,CAAC;IAehC,SAAS,IAAI,KAAK,CAAC,KAAK,CAAC;IAoCzB,SAAS,IAAI,IAAI;IAKjB,mBAAmB;IA0BnB,iBAAiB;IA0BxB,OAAO,CAAC,SAAS;IAoBjB,OAAO,CAAC,OAAO;IAUf,OAAO,CAAC,oBAAoB;IAwC5B,OAAO,CAAC,kBAAkB;CAuC1B"}
|
||||
212
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/LineStore.js
generated
vendored
Normal file
212
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/LineStore.js
generated
vendored
Normal file
@@ -0,0 +1,212 @@
|
||||
import { Line, LineDirection } from './Line.js';
|
||||
import { Point } from './Point.js';
|
||||
import { Shape } from './Shape.js';
|
||||
import { Table } from './Table.js';
|
||||
export class LineStore {
|
||||
hLines = [];
|
||||
vLines = [];
|
||||
add(line) {
|
||||
if (line.valid) {
|
||||
if (line.direction === LineDirection.Horizontal) {
|
||||
this.hLines.push(line);
|
||||
}
|
||||
else if (line.direction === LineDirection.Vertical) {
|
||||
this.vLines.push(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
addRectangle(rect) {
|
||||
for (const line of rect.getLines()) {
|
||||
this.add(line);
|
||||
}
|
||||
}
|
||||
getTableData() {
|
||||
const result = [];
|
||||
const tables = this.getTables();
|
||||
for (const table of tables) {
|
||||
const data = table.toData();
|
||||
if (data) {
|
||||
result.push(data);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
getTables() {
|
||||
const result = [];
|
||||
while (this.hLines.length !== 0) {
|
||||
const hLine = this.hLines.shift();
|
||||
if (!hLine)
|
||||
continue;
|
||||
const filled = this.tryFill(result, hLine);
|
||||
if (filled)
|
||||
continue;
|
||||
const table = new Table(hLine);
|
||||
this.fillTable(table);
|
||||
result.push(table);
|
||||
}
|
||||
while (this.vLines.length !== 0) {
|
||||
const vLine = this.vLines.shift();
|
||||
if (!vLine)
|
||||
continue;
|
||||
const filled = this.tryFill(result, vLine);
|
||||
if (filled)
|
||||
continue;
|
||||
const table = new Table(vLine);
|
||||
this.fillTable(table);
|
||||
result.push(table);
|
||||
}
|
||||
const validTables = result.filter((t) => t.isValid);
|
||||
for (const table of validTables) {
|
||||
table.normalize();
|
||||
}
|
||||
return validTables;
|
||||
}
|
||||
normalize() {
|
||||
this.normalizeHorizontal();
|
||||
this.normalizeVertical();
|
||||
}
|
||||
normalizeHorizontal() {
|
||||
this.hLines.sort((l1, l2) => l1.from.y - l2.from.y);
|
||||
const newLines = [];
|
||||
let sameY = [];
|
||||
for (const line of this.hLines) {
|
||||
if (sameY.length === 0) {
|
||||
sameY.push(line);
|
||||
}
|
||||
else if (Math.abs(sameY[0]?.from.y - line.from.y) < Shape.tolerance) {
|
||||
sameY.push(line);
|
||||
}
|
||||
else {
|
||||
const merged = this.margeHorizontalLines(sameY);
|
||||
newLines.push(...merged);
|
||||
sameY = [line];
|
||||
}
|
||||
}
|
||||
if (sameY.length > 0) {
|
||||
const merged = this.margeHorizontalLines(sameY);
|
||||
newLines.push(...merged);
|
||||
}
|
||||
this.hLines = newLines;
|
||||
}
|
||||
normalizeVertical() {
|
||||
this.vLines.sort((l1, l2) => l1.from.x - l2.from.x);
|
||||
const newLines = [];
|
||||
let sameX = [];
|
||||
for (const line of this.vLines) {
|
||||
if (sameX.length === 0) {
|
||||
sameX.push(line);
|
||||
}
|
||||
else if (Math.abs(sameX[0]?.from.x - line.from.x) < Shape.tolerance) {
|
||||
sameX.push(line);
|
||||
}
|
||||
else {
|
||||
const merged = this.margeVerticalLines(sameX);
|
||||
newLines.push(...merged);
|
||||
sameX = [line];
|
||||
}
|
||||
}
|
||||
if (sameX.length > 0) {
|
||||
const merged = this.margeVerticalLines(sameX);
|
||||
newLines.push(...merged);
|
||||
}
|
||||
this.vLines = newLines;
|
||||
}
|
||||
fillTable(table) {
|
||||
const newVLines = [];
|
||||
const newHLines = [];
|
||||
for (const vLine of this.vLines) {
|
||||
if (!table.add(vLine)) {
|
||||
newVLines.push(vLine);
|
||||
}
|
||||
}
|
||||
for (const hLine of this.hLines) {
|
||||
if (!table.add(hLine)) {
|
||||
newHLines.push(hLine);
|
||||
}
|
||||
}
|
||||
this.hLines = newHLines;
|
||||
this.vLines = newVLines;
|
||||
}
|
||||
tryFill(tables, line) {
|
||||
for (const table of tables) {
|
||||
if (table.add(line)) {
|
||||
this.fillTable(table);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
margeHorizontalLines(sameYLines) {
|
||||
const result = [];
|
||||
sameYLines.sort((l1, l2) => l1.from.x - l2.from.x);
|
||||
const sameY = sameYLines[0]?.from.y;
|
||||
if (sameY === undefined)
|
||||
return result;
|
||||
let minX = Number.MAX_SAFE_INTEGER;
|
||||
let maxX = Number.MIN_SAFE_INTEGER;
|
||||
for (const line of sameYLines) {
|
||||
if (line.from.x - maxX < Shape.tolerance) {
|
||||
if (line.from.x < minX) {
|
||||
minX = line.from.x;
|
||||
}
|
||||
if (line.to.x > maxX) {
|
||||
maxX = line.to.x;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (maxX > minX) {
|
||||
result.push(new Line(new Point(minX, sameY), new Point(maxX, sameY)));
|
||||
}
|
||||
minX = line.from.x;
|
||||
maxX = line.to.x;
|
||||
}
|
||||
}
|
||||
const last = result[result.length - 1];
|
||||
if (last) {
|
||||
if (last.from.x !== minX && last.to.x !== maxX) {
|
||||
result.push(new Line(new Point(minX, sameY), new Point(maxX, sameY)));
|
||||
}
|
||||
}
|
||||
else {
|
||||
result.push(new Line(new Point(minX, sameY), new Point(maxX, sameY)));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
margeVerticalLines(sameXLines) {
|
||||
const result = [];
|
||||
sameXLines.sort((l1, l2) => l1.from.y - l2.from.y);
|
||||
const sameX = sameXLines[0]?.from.x;
|
||||
if (sameX === undefined)
|
||||
return result;
|
||||
let minY = Number.MAX_SAFE_INTEGER;
|
||||
let maxY = Number.MIN_SAFE_INTEGER;
|
||||
for (const line of sameXLines) {
|
||||
if (line.from.y - maxY < Shape.tolerance) {
|
||||
if (line.from.y < minY) {
|
||||
minY = line.from.y;
|
||||
}
|
||||
if (line.to.y > maxY) {
|
||||
maxY = line.to.y;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (maxY > minY) {
|
||||
result.push(new Line(new Point(sameX, minY), new Point(sameX, maxY)));
|
||||
}
|
||||
minY = line.from.y;
|
||||
maxY = line.to.y;
|
||||
}
|
||||
}
|
||||
const last = result[result.length - 1];
|
||||
if (last) {
|
||||
if (last.from.y !== minY && last.to.y !== maxY) {
|
||||
result.push(new Line(new Point(sameX, minY), new Point(sameX, maxY)));
|
||||
}
|
||||
}
|
||||
else {
|
||||
result.push(new Line(new Point(sameX, minY), new Point(sameX, maxY)));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=LineStore.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/LineStore.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/LineStore.js.map
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
9
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Point.d.ts
generated
vendored
Normal file
9
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Point.d.ts
generated
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
import { Shape } from './Shape.js';
|
||||
export declare class Point extends Shape {
|
||||
x: number;
|
||||
y: number;
|
||||
constructor(x: number, y: number);
|
||||
equal(point: Point): boolean;
|
||||
transform(matrix: Array<number>): this;
|
||||
}
|
||||
//# sourceMappingURL=Point.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Point.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Point.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"Point.d.ts","sourceRoot":"","sources":["../../../../src/pdf-parse/geometry/Point.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAEnC,qBAAa,KAAM,SAAQ,KAAK;IACxB,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;gBAEL,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM;IAMzB,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,OAAO;IAI5B,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,GAAG,IAAI;CAM7C"}
|
||||
20
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Point.js
generated
vendored
Normal file
20
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Point.js
generated
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
import { Shape } from './Shape.js';
|
||||
export class Point extends Shape {
|
||||
x;
|
||||
y;
|
||||
constructor(x, y) {
|
||||
super();
|
||||
this.x = x;
|
||||
this.y = y;
|
||||
}
|
||||
equal(point) {
|
||||
return point.x === this.x && point.y === this.y;
|
||||
}
|
||||
transform(matrix) {
|
||||
const p = Shape.applyTransform([this.x, this.y], matrix);
|
||||
this.x = p[0];
|
||||
this.y = p[1];
|
||||
return this;
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=Point.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Point.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Point.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"Point.js","sourceRoot":"","sources":["../../../../src/pdf-parse/geometry/Point.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAEnC,MAAM,OAAO,KAAM,SAAQ,KAAK;IACxB,CAAC,CAAS;IACV,CAAC,CAAS;IAEjB,YAAY,CAAS,EAAE,CAAS;QAC/B,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QACX,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACZ,CAAC;IAEM,KAAK,CAAC,KAAY;QACxB,OAAO,KAAK,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC;IACjD,CAAC;IAEM,SAAS,CAAC,MAAqB;QACrC,MAAM,CAAC,GAAG,KAAK,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;QACzD,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACd,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACd,OAAO,IAAI,CAAC;IACb,CAAC;CACD"}
|
||||
13
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Rectangle.d.ts
generated
vendored
Normal file
13
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Rectangle.d.ts
generated
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
import { Line } from './Line.js';
|
||||
import { Point } from './Point.js';
|
||||
import { Shape } from './Shape.js';
|
||||
export declare class Rectangle extends Shape {
|
||||
from: Point;
|
||||
width: number;
|
||||
height: number;
|
||||
constructor(from: Point, width: number, height: number);
|
||||
get to(): Point;
|
||||
getLines(): Line[];
|
||||
transform(matrix: Array<number>): this;
|
||||
}
|
||||
//# sourceMappingURL=Rectangle.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Rectangle.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Rectangle.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"Rectangle.d.ts","sourceRoot":"","sources":["../../../../src/pdf-parse/geometry/Rectangle.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAEnC,qBAAa,SAAU,SAAQ,KAAK;IAC5B,IAAI,EAAE,KAAK,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;gBAEV,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IAOtD,IAAW,EAAE,IAAI,KAAK,CAErB;IAEM,QAAQ,IAAI,IAAI,EAAE;IAYlB,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,GAAG,IAAI;CAe7C"}
|
||||
40
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Rectangle.js
generated
vendored
Normal file
40
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Rectangle.js
generated
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
import { Line } from './Line.js';
|
||||
import { Point } from './Point.js';
|
||||
import { Shape } from './Shape.js';
|
||||
export class Rectangle extends Shape {
|
||||
from;
|
||||
width;
|
||||
height;
|
||||
constructor(from, width, height) {
|
||||
super();
|
||||
this.from = from;
|
||||
this.width = width;
|
||||
this.height = height;
|
||||
}
|
||||
get to() {
|
||||
return new Point(this.from.x + this.width, this.from.y + this.height);
|
||||
}
|
||||
getLines() {
|
||||
const to = this.to;
|
||||
const lines = [
|
||||
new Line(this.from, new Point(to.x, this.from.y)),
|
||||
new Line(this.from, new Point(this.from.x, to.y)),
|
||||
new Line(new Point(to.x, this.from.y), to),
|
||||
new Line(new Point(this.from.x, to.y), to),
|
||||
];
|
||||
return lines.filter((l) => l.valid);
|
||||
}
|
||||
transform(matrix) {
|
||||
const p1 = Shape.applyTransform([this.from.x, this.from.y], matrix);
|
||||
const p2 = Shape.applyTransform([this.from.x + this.width, this.from.y + this.height], matrix);
|
||||
const x = Math.min(p1[0], p2[0]);
|
||||
const y = Math.min(p1[1], p2[1]);
|
||||
const width = Math.abs(p1[0] - p2[0]);
|
||||
const height = Math.abs(p1[1] - p2[1]);
|
||||
this.from = new Point(x, y);
|
||||
this.width = width;
|
||||
this.height = height;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=Rectangle.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Rectangle.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Rectangle.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"Rectangle.js","sourceRoot":"","sources":["../../../../src/pdf-parse/geometry/Rectangle.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAEnC,MAAM,OAAO,SAAU,SAAQ,KAAK;IAC5B,IAAI,CAAQ;IACZ,KAAK,CAAS;IACd,MAAM,CAAS;IAEtB,YAAY,IAAW,EAAE,KAAa,EAAE,MAAc;QACrD,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACtB,CAAC;IAED,IAAW,EAAE;QACZ,OAAO,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;IACvE,CAAC;IAEM,QAAQ;QACd,MAAM,EAAE,GAAG,IAAI,CAAC,EAAE,CAAC;QAEnB,MAAM,KAAK,GAAgB;YAC1B,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,KAAK,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACjD,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;YACjD,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;YAC1C,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;SAC1C,CAAC;QACF,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;IACrC,CAAC;IAEM,SAAS,CAAC,MAAqB;QACrC,MAAM,EAAE,GAAG,KAAK,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;QACpE,MAAM,EAAE,GAAG,KAAK,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,CAAC;QAE/F,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACjC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAEjC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAEvC,IAAI,CAAC,IAAI,GAAG,IAAI,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,OAAO,IAAI,CAAC;IACb,CAAC;CACD"}
|
||||
6
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Shape.d.ts
generated
vendored
Normal file
6
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Shape.d.ts
generated
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
export declare abstract class Shape {
|
||||
static tolerance: number;
|
||||
abstract transform(matrix: Array<number>): this;
|
||||
static applyTransform(p: Array<number>, m: Array<number>): Array<number>;
|
||||
}
|
||||
//# sourceMappingURL=Shape.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Shape.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Shape.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"Shape.d.ts","sourceRoot":"","sources":["../../../../src/pdf-parse/geometry/Shape.ts"],"names":[],"mappings":"AAAA,8BAAsB,KAAK;IAC1B,MAAM,CAAC,SAAS,SAAK;aACL,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,GAAG,IAAI;IAEtD,MAAM,CAAC,cAAc,CAAC,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;CAKxE"}
|
||||
9
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Shape.js
generated
vendored
Normal file
9
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Shape.js
generated
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
export class Shape {
|
||||
static tolerance = 2;
|
||||
static applyTransform(p, m) {
|
||||
const xt = p[0] * m[0] + p[1] * m[2] + m[4];
|
||||
const yt = p[0] * m[1] + p[1] * m[3] + m[5];
|
||||
return [xt, yt];
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=Shape.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Shape.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Shape.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"Shape.js","sourceRoot":"","sources":["../../../../src/pdf-parse/geometry/Shape.ts"],"names":[],"mappings":"AAAA,MAAM,OAAgB,KAAK;IAC1B,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC;IAGrB,MAAM,CAAC,cAAc,CAAC,CAAgB,EAAE,CAAgB;QACvD,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAC5C,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAC5C,OAAO,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;IACjB,CAAC"}
|
||||
24
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Table.d.ts
generated
vendored
Normal file
24
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Table.d.ts
generated
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
import { Line } from './Line.js';
|
||||
import { TableData } from './TableData.js';
|
||||
export declare class Table {
|
||||
hLines: Array<Line>;
|
||||
vLines: Array<Line>;
|
||||
constructor(line: Line);
|
||||
get isValid(): boolean;
|
||||
get rowPivots(): Array<number>;
|
||||
get colPivots(): Array<number>;
|
||||
add(line: Line): boolean;
|
||||
private intersection;
|
||||
private getSameHorizontal;
|
||||
private getSameVertical;
|
||||
private mergeHorizontalLines;
|
||||
private mergeVerticalLines;
|
||||
normalize(): void;
|
||||
verticalExists(line: Line, y1: number, y2: number): boolean;
|
||||
horizontalExists(line: Line, x1: number, x2: number): boolean;
|
||||
private findBottomLineIndex;
|
||||
private findVerticalLineIndexs;
|
||||
private getRow;
|
||||
toData(): TableData;
|
||||
}
|
||||
//# sourceMappingURL=Table.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Table.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Table.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"Table.d.ts","sourceRoot":"","sources":["../../../../src/pdf-parse/geometry/Table.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAiB,MAAM,WAAW,CAAC;AAGhD,OAAO,EAAkB,SAAS,EAAiB,MAAM,gBAAgB,CAAC;AAE1E,qBAAa,KAAK;IACV,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,CAAM;IACzB,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,CAAM;gBAEpB,IAAI,EAAE,IAAI;IAQtB,IAAW,OAAO,IAAI,OAAO,CAE5B;IAED,IAAW,SAAS,IAAI,KAAK,CAAC,MAAM,CAAC,CAQpC;IAED,IAAW,SAAS,IAAI,KAAK,CAAC,MAAM,CAAC,CAQpC;IAEM,GAAG,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO;IAgB/B,OAAO,CAAC,YAAY;IAsBpB,OAAO,CAAC,iBAAiB;IAmBzB,OAAO,CAAC,eAAe;IAmBvB,OAAO,CAAC,oBAAoB;IAwB5B,OAAO,CAAC,kBAAkB;IAqBnB,SAAS,IAAI,IAAI;IAkCjB,cAAc,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO;IAqB3D,gBAAgB,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO;IAqBpE,OAAO,CAAC,mBAAmB;IAU3B,OAAO,CAAC,sBAAsB;IAa9B,OAAO,CAAC,MAAM;IAqCP,MAAM,IAAI,SAAS;CAmB1B"}
|
||||
260
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Table.js
generated
vendored
Normal file
260
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Table.js
generated
vendored
Normal file
@@ -0,0 +1,260 @@
|
||||
import { Line, LineDirection } from './Line.js';
|
||||
import { Point } from './Point.js';
|
||||
import { Shape } from './Shape.js';
|
||||
import { TableData } from './TableData.js';
|
||||
export class Table {
|
||||
hLines = [];
|
||||
vLines = [];
|
||||
constructor(line) {
|
||||
if (line.direction === LineDirection.Horizontal) {
|
||||
this.hLines.push(line);
|
||||
}
|
||||
else if (line.direction === LineDirection.Vertical) {
|
||||
this.vLines.push(line);
|
||||
}
|
||||
}
|
||||
get isValid() {
|
||||
return this.hLines.length + this.vLines.length > 4;
|
||||
}
|
||||
get rowPivots() {
|
||||
const rowSet = new Set();
|
||||
for (const line of this.hLines) {
|
||||
rowSet.add(line.from.y);
|
||||
}
|
||||
return [...rowSet].sort((a, b) => a - b);
|
||||
}
|
||||
get colPivots() {
|
||||
const colSet = new Set();
|
||||
for (const line of this.vLines) {
|
||||
colSet.add(line.from.x);
|
||||
}
|
||||
return [...colSet].sort((a, b) => a - b);
|
||||
}
|
||||
add(line) {
|
||||
const hasIntersection = this.intersection(line);
|
||||
if (hasIntersection) {
|
||||
if (line.direction === LineDirection.Horizontal) {
|
||||
this.hLines.push(line);
|
||||
return true;
|
||||
}
|
||||
else if (line.direction === LineDirection.Vertical) {
|
||||
this.vLines.push(line);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
intersection(line) {
|
||||
let flag = false;
|
||||
if (!line.valid)
|
||||
return flag;
|
||||
if (line.direction === LineDirection.Horizontal) {
|
||||
for (const vLine of this.vLines) {
|
||||
const p = line.intersection(vLine);
|
||||
if (p) {
|
||||
flag = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (line.direction === LineDirection.Vertical) {
|
||||
for (const hLine of this.hLines) {
|
||||
const p = line.intersection(hLine);
|
||||
if (p) {
|
||||
flag = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return flag;
|
||||
}
|
||||
getSameHorizontal(line) {
|
||||
const same = [line];
|
||||
const other = [];
|
||||
while (this.hLines.length > 0) {
|
||||
const hLine = this.hLines.shift();
|
||||
if (!hLine)
|
||||
continue;
|
||||
if (hLine.from.y === line.from.y) {
|
||||
same.push(hLine);
|
||||
}
|
||||
else {
|
||||
other.push(hLine);
|
||||
}
|
||||
}
|
||||
this.hLines = other;
|
||||
return same;
|
||||
}
|
||||
getSameVertical(line) {
|
||||
const same = [line];
|
||||
const other = [];
|
||||
while (this.vLines.length > 0) {
|
||||
const vLine = this.vLines.shift();
|
||||
if (!vLine)
|
||||
continue;
|
||||
if (vLine.from.x === line.from.x) {
|
||||
same.push(vLine);
|
||||
}
|
||||
else {
|
||||
other.push(vLine);
|
||||
}
|
||||
}
|
||||
this.vLines = other;
|
||||
return same;
|
||||
}
|
||||
mergeHorizontalLines(lines) {
|
||||
lines.sort((l1, l2) => l1.from.x - l2.from.x);
|
||||
const minX = lines[0].from.x;
|
||||
const maxX = lines[lines.length - 1].to.x;
|
||||
const resultLine = new Line(new Point(minX, lines[0].from.y), new Point(maxX, lines[0].from.y));
|
||||
for (let i = 1; i < lines.length; i++) {
|
||||
const prevLine = lines[i - 1];
|
||||
const currLine = lines[i];
|
||||
if (Math.abs(prevLine.to.x - currLine.from.x) > Shape.tolerance) {
|
||||
const gapLine = new Line(new Point(prevLine.to.x, prevLine.from.y), new Point(currLine.from.x, currLine.from.y));
|
||||
resultLine.addGap(gapLine);
|
||||
}
|
||||
}
|
||||
return resultLine;
|
||||
}
|
||||
mergeVerticalLines(lines) {
|
||||
lines.sort((l1, l2) => l1.from.y - l2.from.y);
|
||||
const minY = lines[0].from.y;
|
||||
const maxY = lines[lines.length - 1].to.y;
|
||||
const resultLine = new Line(new Point(lines[0].from.x, minY), new Point(lines[0].from.x, maxY));
|
||||
for (let i = 1; i < lines.length; i++) {
|
||||
const prevLine = lines[i - 1];
|
||||
const currLine = lines[i];
|
||||
if (Math.abs(prevLine.to.y - currLine.from.y) > Shape.tolerance) {
|
||||
const gapLine = new Line(new Point(prevLine.to.x, prevLine.to.y), new Point(prevLine.to.x, currLine.from.y));
|
||||
resultLine.addGap(gapLine);
|
||||
}
|
||||
}
|
||||
return resultLine;
|
||||
}
|
||||
normalize() {
|
||||
this.hLines = this.hLines.filter((l) => l.intersections.length > 1);
|
||||
this.vLines = this.vLines.filter((l) => l.intersections.length > 1);
|
||||
this.hLines.sort((l1, l2) => l1.from.y - l2.from.y);
|
||||
this.vLines.sort((l1, l2) => l1.from.x - l2.from.x);
|
||||
const newHLines = [];
|
||||
while (this.hLines.length > 0) {
|
||||
const line = this.hLines.shift();
|
||||
if (!line)
|
||||
continue;
|
||||
const lines = this.getSameHorizontal(line);
|
||||
const merged = this.mergeHorizontalLines(lines);
|
||||
newHLines.push(merged);
|
||||
}
|
||||
this.hLines = newHLines;
|
||||
const newVLines = [];
|
||||
while (this.vLines.length > 0) {
|
||||
const line = this.vLines.shift();
|
||||
if (!line)
|
||||
continue;
|
||||
const lines = this.getSameVertical(line);
|
||||
const merged = this.mergeVerticalLines(lines);
|
||||
newVLines.push(merged);
|
||||
}
|
||||
this.vLines = newVLines;
|
||||
}
|
||||
verticalExists(line, y1, y2) {
|
||||
if (line.direction !== LineDirection.Vertical) {
|
||||
throw new Error('Line is not vertical');
|
||||
}
|
||||
if (y1 >= y2) {
|
||||
throw new Error('y1 must be less than y2');
|
||||
}
|
||||
if (line.from.y <= y1 && line.to.y >= y2) {
|
||||
for (const gap of line.gaps) {
|
||||
if (gap.from.y <= y1 && gap.to.y >= y2) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
horizontalExists(line, x1, x2) {
|
||||
if (line.direction !== LineDirection.Horizontal) {
|
||||
throw new Error('Line is not horizontal');
|
||||
}
|
||||
if (x1 >= x2) {
|
||||
throw new Error('x1 must be less than x2');
|
||||
}
|
||||
if (line.from.x <= x1 && line.to.x >= x2) {
|
||||
for (const gap of line.gaps) {
|
||||
if (gap.from.x <= x1 && gap.to.x >= x2) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
findBottomLineIndex(h2Index, xMiddle) {
|
||||
for (let i = h2Index; i < this.hLines.length; i++) {
|
||||
const hLine = this.hLines[i];
|
||||
if (hLine.from.x <= xMiddle && hLine.to.x >= xMiddle) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
findVerticalLineIndexs(topHLine, yMiddle) {
|
||||
const result = [];
|
||||
for (let i = 0; i < this.vLines.length; i++) {
|
||||
const vLine = this.vLines[i];
|
||||
if (vLine.from.y <= yMiddle && vLine.to.y >= yMiddle && topHLine.intersection(vLine)) {
|
||||
result.push(i);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
getRow(h1Index, h2Index, yMiddle) {
|
||||
const tableRow = [];
|
||||
//const colCount = this.vLines.length -1
|
||||
const topHLine = this.hLines[h1Index];
|
||||
const vLineIndexes = this.findVerticalLineIndexs(topHLine, yMiddle);
|
||||
for (let i = 1; i < vLineIndexes.length; i++) {
|
||||
const leftVLine = this.vLines[vLineIndexes[i - 1]];
|
||||
const rightVLine = this.vLines[vLineIndexes[i]];
|
||||
const xMiddle = (leftVLine.from.x + rightVLine.from.x) / 2;
|
||||
const bottomHLineIndex = this.findBottomLineIndex(h2Index, xMiddle);
|
||||
const bottomHLine = this.hLines[bottomHLineIndex];
|
||||
// minXY: {x:leftVLine.from.x,y:topHLine.from.y},
|
||||
// maxXY: {x:rightVLine.from.x,y:bottomHLine.from.y},
|
||||
const tableCell = {
|
||||
minXY: new Point(leftVLine.from.x, topHLine.from.y),
|
||||
maxXY: new Point(rightVLine.from.x, bottomHLine.from.y),
|
||||
width: rightVLine.from.x - leftVLine.from.x,
|
||||
height: bottomHLine.from.y - topHLine.from.y,
|
||||
text: [],
|
||||
};
|
||||
const colSpan = vLineIndexes[i] - vLineIndexes[i - 1];
|
||||
const rowSpan = bottomHLineIndex - h1Index;
|
||||
if (colSpan > 1) {
|
||||
tableCell.colspan = colSpan;
|
||||
}
|
||||
if (rowSpan > 1) {
|
||||
tableCell.rowspan = rowSpan;
|
||||
}
|
||||
tableRow.push(tableCell);
|
||||
}
|
||||
return tableRow;
|
||||
}
|
||||
toData() {
|
||||
const rowPivots = this.rowPivots;
|
||||
const colPivots = this.colPivots;
|
||||
const minXY = new Point(colPivots[0], rowPivots[0]);
|
||||
const maxXY = new Point(colPivots[colPivots.length - 1], rowPivots[rowPivots.length - 1]);
|
||||
const result = new TableData(minXY, maxXY, rowPivots, colPivots);
|
||||
for (let h1 = 1; h1 < this.hLines.length; h1++) {
|
||||
const prevHLine = this.hLines[h1 - 1];
|
||||
const currHLine = this.hLines[h1];
|
||||
const YMiddle = (prevHLine.from.y + currHLine.from.y) / 2;
|
||||
const rowData = this.getRow(h1 - 1, h1, YMiddle);
|
||||
result.rows.push(rowData);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=Table.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Table.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/Table.js.map
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
25
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/TableData.d.ts
generated
vendored
Normal file
25
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/TableData.d.ts
generated
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
import type { Point } from './Point.js';
|
||||
export type TableCell = {
|
||||
minXY: Point;
|
||||
maxXY: Point;
|
||||
width: number;
|
||||
height: number;
|
||||
colspan?: number;
|
||||
rowspan?: number;
|
||||
text: Array<string>;
|
||||
};
|
||||
export type TableRow = Array<TableCell>;
|
||||
export declare class TableData {
|
||||
minXY: Point;
|
||||
maxXY: Point;
|
||||
rows: Array<TableRow>;
|
||||
private rowPivots;
|
||||
private colPivots;
|
||||
constructor(minXY: Point, maxXY: Point, rowPivots: Array<number>, colPivots: Array<number>);
|
||||
findCell(x: number, y: number): TableCell | undefined;
|
||||
get cellCount(): number;
|
||||
get rowCount(): number;
|
||||
check(): boolean;
|
||||
toArray(): string[][];
|
||||
}
|
||||
//# sourceMappingURL=TableData.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/TableData.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/TableData.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"TableData.d.ts","sourceRoot":"","sources":["../../../../src/pdf-parse/geometry/TableData.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,MAAM,SAAS,GAAG;IACvB,KAAK,EAAE,KAAK,CAAC;IACb,KAAK,EAAE,KAAK,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACpB,CAAC;AAEF,MAAM,MAAM,QAAQ,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;AAExC,qBAAa,SAAS;IACd,KAAK,EAAE,KAAK,CAAC;IACb,KAAK,EAAE,KAAK,CAAC;IACb,IAAI,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IAC7B,OAAO,CAAC,SAAS,CAAgB;IACjC,OAAO,CAAC,SAAS,CAAgB;gBAErB,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC;IAQnF,QAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS;IAc5D,IAAW,SAAS,WAEnB;IAED,IAAW,QAAQ,WAElB;IAEM,KAAK,IAAI,OAAO;IAkChB,OAAO,IAAI,MAAM,EAAE,EAAE;CAc5B"}
|
||||
76
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/TableData.js
generated
vendored
Normal file
76
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/TableData.js
generated
vendored
Normal file
@@ -0,0 +1,76 @@
|
||||
export class TableData {
|
||||
minXY;
|
||||
maxXY;
|
||||
rows;
|
||||
rowPivots;
|
||||
colPivots;
|
||||
constructor(minXY, maxXY, rowPivots, colPivots) {
|
||||
this.minXY = minXY;
|
||||
this.maxXY = maxXY;
|
||||
this.rows = [];
|
||||
this.rowPivots = rowPivots;
|
||||
this.colPivots = colPivots;
|
||||
}
|
||||
findCell(x, y) {
|
||||
if (x >= this.minXY.x && y >= this.minXY.y && x <= this.maxXY.x && y <= this.maxXY.y) {
|
||||
for (const row of this.rows) {
|
||||
for (const cell of row) {
|
||||
if (cell.minXY.x <= x && cell.minXY.y <= y && cell.maxXY.x >= x && cell.maxXY.y >= y) {
|
||||
return cell;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
get cellCount() {
|
||||
return this.rows.reduce((acc, row) => acc + row.length, 0);
|
||||
}
|
||||
get rowCount() {
|
||||
return this.rows.length;
|
||||
}
|
||||
check() {
|
||||
// const cellCounts:Array<number> = []
|
||||
//
|
||||
// for (const row of this.rows) {
|
||||
// let cellNum = 0
|
||||
// for (const cell of row) {
|
||||
// cellNum += cell.colspan || 1
|
||||
// }
|
||||
// cellCounts.push(cellNum)
|
||||
// }
|
||||
//
|
||||
// for (let i = 1; i < cellCounts.length; i++) {
|
||||
// if (cellCounts[i] !== cellCounts[i - 1]) {
|
||||
// return false
|
||||
// }
|
||||
// }
|
||||
const virtualCellCount = (this.colPivots.length - 1) * (this.rowPivots.length - 1);
|
||||
let allCellCount = 0;
|
||||
for (const row of this.rows) {
|
||||
for (const cell of row) {
|
||||
const count = (cell.colspan || 1) * (cell.rowspan || 1);
|
||||
allCellCount += count;
|
||||
}
|
||||
}
|
||||
if (virtualCellCount !== allCellCount) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
toArray() {
|
||||
const tableArr = [];
|
||||
for (const row of this.rows) {
|
||||
const rowArr = [];
|
||||
for (const cell of row) {
|
||||
let text = cell.text.join('');
|
||||
text = text.replace(/^[\s]+|[\s]+$/g, '');
|
||||
text = text.trim();
|
||||
rowArr.push(text);
|
||||
}
|
||||
tableArr.push(rowArr);
|
||||
}
|
||||
return tableArr;
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=TableData.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/TableData.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/TableData.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"TableData.js","sourceRoot":"","sources":["../../../../src/pdf-parse/geometry/TableData.ts"],"names":[],"mappings":"AAcA,MAAM,OAAO,SAAS;IACd,KAAK,CAAQ;IACb,KAAK,CAAQ;IACb,IAAI,CAAkB;IACrB,SAAS,CAAgB;IACzB,SAAS,CAAgB;IAEjC,YAAY,KAAY,EAAE,KAAY,EAAE,SAAwB,EAAE,SAAwB;QACzF,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,IAAI,GAAG,EAAE,CAAC;QACf,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC5B,CAAC;IAEM,QAAQ,CAAC,CAAS,EAAE,CAAS;QACnC,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;YACtF,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;gBAC7B,KAAK,MAAM,IAAI,IAAI,GAAG,EAAE,CAAC;oBACxB,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;wBACtF,OAAO,IAAI,CAAC;oBACb,CAAC;gBACF,CAAC;YACF,CAAC;QACF,CAAC;QAED,OAAO,SAAS,CAAC;IAClB,CAAC;IAED,IAAW,SAAS;QACnB,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC5D,CAAC;IAED,IAAW,QAAQ;QAClB,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;IACzB,CAAC;IAEM,KAAK;QACX,sCAAsC;QACtC,EAAE;QACF,iCAAiC;QACjC,sBAAsB;QACtB,gCAAgC;QAChC,uCAAuC;QACvC,QAAQ;QACR,+BAA+B;QAC/B,IAAI;QACJ,EAAE;QACF,gDAAgD;QAChD,iDAAiD;QACjD,uBAAuB;QACvB,QAAQ;QACR,IAAI;QAEJ,MAAM,gBAAgB,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACnF,IAAI,YAAY,GAAG,CAAC,CAAC;QAErB,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YAC7B,KAAK,MAAM,IAAI,IAAI,GAAG,EAAE,CAAC;gBACxB,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC;gBACxD,YAAY,IAAI,KAAK,CAAC;YACvB,CAAC;QACF,CAAC;QAED,IAAI,gBAAgB,KAAK,YAAY,EAAE,CAAC;YACvC,OAAO,KAAK,CAAC;QACd,CAAC;QAED,OAAO,IAAI,CAAC;IACb,CAAC;IAEM,OAAO;QACb,MAAM,QAAQ,GAAe,EAAE,CAAC;QAChC,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YAC7B,MAAM,MAAM,GAAa,EAAE,CAAC;YAC5B,KAAK,MAAM,IAAI,IAAI,GAAG,EAAE,CAAC;gBACxB,IAAI,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAC9B,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAC;gBAC1C,IAAI,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;gBACnB,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACnB,CAAC;YACD,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvB,CAAC;QACD,OAAO,QAAQ,CAAC;IACjB,CAAC;CACD"}
|
||||
7
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/index.d.ts
generated
vendored
Normal file
7
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/index.d.ts
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
export { Line, LineDirection } from './Line.js';
|
||||
export { LineStore } from './LineStore.js';
|
||||
export { Point } from './Point.js';
|
||||
export { Rectangle } from './Rectangle.js';
|
||||
export { Shape } from './Shape.js';
|
||||
export { Table } from './Table.js';
|
||||
//# sourceMappingURL=index.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/index.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/index.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/pdf-parse/geometry/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAChD,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC"}
|
||||
7
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/index.js
generated
vendored
Normal file
7
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/index.js
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
export { Line, LineDirection } from './Line.js';
|
||||
export { LineStore } from './LineStore.js';
|
||||
export { Point } from './Point.js';
|
||||
export { Rectangle } from './Rectangle.js';
|
||||
export { Shape } from './Shape.js';
|
||||
export { Table } from './Table.js';
|
||||
//# sourceMappingURL=index.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/index.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/geometry/index.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/pdf-parse/geometry/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAChD,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC"}
|
||||
13
node_modules/pdf-parse/dist/pdf-parse/esm/index.d.ts
generated
vendored
Normal file
13
node_modules/pdf-parse/dist/pdf-parse/esm/index.d.ts
generated
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
import { PDFParse } from './PDFParse.js';
|
||||
export { VerbosityLevel } from 'pdfjs-dist/legacy/build/pdf.mjs';
|
||||
export * from './Exception.js';
|
||||
export * from './geometry/index.js';
|
||||
export type { EmbeddedImage, ImageKindKey, ImageKindValue, ImageResult, PageImages } from './ImageResult.js';
|
||||
export type { DateNode, InfoResult, Metadata, OutlineNode, PageLinkResult } from './InfoResult.js';
|
||||
export type * from './LoadParameters.js';
|
||||
export type * from './ParseParameters.js';
|
||||
export type { Screenshot, ScreenshotResult } from './ScreenshotResult.js';
|
||||
export type { PageTableResult, TableArray, TableResult } from './TableResult.js';
|
||||
export type { PageTextResult, TextResult } from './TextResult.js';
|
||||
export { PDFParse };
|
||||
//# sourceMappingURL=index.d.ts.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/index.d.ts.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/index.d.ts.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/pdf-parse/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAEzC,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AAEjE,cAAc,gBAAgB,CAAC;AAC/B,cAAc,qBAAqB,CAAC;AACpC,YAAY,EAAE,aAAa,EAAE,YAAY,EAAE,cAAc,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAC7G,YAAY,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACnG,mBAAmB,qBAAqB,CAAC;AACzC,mBAAmB,sBAAsB,CAAC;AAC1C,YAAY,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC1E,YAAY,EAAE,eAAe,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACjF,YAAY,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAElE,OAAO,EAAE,QAAQ,EAAE,CAAC"}
|
||||
6
node_modules/pdf-parse/dist/pdf-parse/esm/index.js
generated
vendored
Normal file
6
node_modules/pdf-parse/dist/pdf-parse/esm/index.js
generated
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
import { PDFParse } from './PDFParse.js';
|
||||
export { VerbosityLevel } from 'pdfjs-dist/legacy/build/pdf.mjs';
|
||||
export * from './Exception.js';
|
||||
export * from './geometry/index.js';
|
||||
export { PDFParse };
|
||||
//# sourceMappingURL=index.js.map
|
||||
1
node_modules/pdf-parse/dist/pdf-parse/esm/index.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/esm/index.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/pdf-parse/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAEzC,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AAEjE,cAAc,gBAAgB,CAAC;AAC/B,cAAc,qBAAqB,CAAC;AASpC,OAAO,EAAE,QAAQ,EAAE,CAAC"}
|
||||
28
node_modules/pdf-parse/dist/pdf-parse/esm/pdf.worker.mjs
generated
vendored
Normal file
28
node_modules/pdf-parse/dist/pdf-parse/esm/pdf.worker.mjs
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
898
node_modules/pdf-parse/dist/pdf-parse/web/pdf-parse.es.d.ts
generated
vendored
Normal file
898
node_modules/pdf-parse/dist/pdf-parse/web/pdf-parse.es.d.ts
generated
vendored
Normal file
@@ -0,0 +1,898 @@
|
||||
import type { DocumentInitParameters } from 'pdfjs-dist/types/src/display/api.js';
|
||||
import type { ImageKind } from 'pdfjs-dist/legacy/build/pdf.mjs';
|
||||
import { Metadata } from 'pdfjs-dist/types/src/display/metadata.js';
|
||||
import type { PDFDataRangeTransport } from 'pdfjs-dist/types/src/display/api.js';
|
||||
import type { PDFWorker } from 'pdfjs-dist/types/src/display/api.js';
|
||||
import { VerbosityLevel } from 'pdfjs-dist/legacy/build/pdf.mjs';
|
||||
|
||||
/**
|
||||
* Error used to indicate that an operation was aborted (for example by an AbortSignal).
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class AbortException extends Error {
|
||||
/**
|
||||
* Create a new AbortException.
|
||||
* @param message - Optional error message.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, cause?: unknown);
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* Consolidated date information gathered from different PDF sources.
|
||||
* The PDF 'Info' dictionary contains CreationDate / ModDate and
|
||||
* the XMP/XAP metadata can contain several timestamps as well. This
|
||||
* structure collects those values (if present) as JavaScript Date objects
|
||||
* or null when the property exists but cannot be parsed.
|
||||
*/
|
||||
export declare type DateNode = {
|
||||
CreationDate?: Date | null;
|
||||
ModDate?: Date | null;
|
||||
XmpCreateDate?: Date | null;
|
||||
XmpModifyDate?: Date | null;
|
||||
XmpMetadataDate?: Date | null;
|
||||
XapCreateDate?: Date | null;
|
||||
XapModifyDate?: Date | null;
|
||||
XapMetadataDate?: Date | null;
|
||||
};
|
||||
|
||||
/**
|
||||
* @public
|
||||
* EmbeddedImage
|
||||
* - Normalized representation of an embedded image extracted from the PDF.
|
||||
* - `data`: Raw image bytes (e.g. PNG/JPEG) as Uint8Array. Use this for file writing or binary processing.
|
||||
* - `dataUrl`: Optional data URL (e.g. "data:image/png;base64,...") for directly embedding in <img> src.
|
||||
* Storing both lets consumers choose the most convenient form; consider omitting one to save memory.
|
||||
* - `name`: Resource name for the image.
|
||||
* - `width` / `height`: Dimensions in pixels.
|
||||
* - `kind`: ImageKindValue from indicating the pixel format (e.g. GRAYSCALE_1BPP / RGB_24BPP / RGBA_32BPP).
|
||||
*/
|
||||
export declare interface EmbeddedImage {
|
||||
data: Uint8Array;
|
||||
dataUrl: string;
|
||||
name: string;
|
||||
width: number;
|
||||
height: number;
|
||||
kind: ImageKindValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Error thrown when the PDF structure/contents are malformed and cannot be parsed.
|
||||
*
|
||||
* This is raised for low-level format problems detected while reading PDF objects.
|
||||
* Errors caused during parsing PDF data.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class FormatError extends Error {
|
||||
/**
|
||||
* Create a new FormatError.
|
||||
* @param message - Optional message describing the format problem.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, cause?: unknown);
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize arbitrary thrown values into an Error instance used by the library.
|
||||
*
|
||||
* Known Error instances with specific names are mapped to the library's
|
||||
* typed exceptions in order to preserve type information and any additional
|
||||
* fields (for example `details`, `status`, etc.). If the value is not an
|
||||
* Error it is converted to a generic Error containing the stringified value.
|
||||
*
|
||||
* @public
|
||||
* @param error - The thrown value to normalize.
|
||||
* @returns An Error instance representing the provided value.
|
||||
*/
|
||||
export declare function getException(error: unknown): Error;
|
||||
|
||||
/**
|
||||
* @public
|
||||
* ImageKindKey
|
||||
* - Represents the keys of the ImageKind enum (e.g. "GRAYSCALE_1BPP", "RGB_24BPP", "RGBA_32BPP").
|
||||
*/
|
||||
export declare type ImageKindKey = keyof typeof ImageKind;
|
||||
|
||||
/**
|
||||
* @public
|
||||
* ImageKindValue
|
||||
* - Represents the numeric values of the ImageKind enum (e.g. 1, 2, 3).
|
||||
*/
|
||||
export declare type ImageKindValue = (typeof ImageKind)[ImageKindKey];
|
||||
|
||||
/**
|
||||
* @public
|
||||
* ImageResult
|
||||
* Helper container for extracted images grouped per page.
|
||||
*/
|
||||
export declare class ImageResult {
|
||||
pages: Array<PageImages>;
|
||||
total: number;
|
||||
getPageImage(num: number, name: string): EmbeddedImage | null;
|
||||
constructor(total: number);
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* Aggregated information about a PDF document returned by getInfo().
|
||||
* The object contains high-level metadata, outline/bookmark structure,
|
||||
* per-page extracted hyperlinks and utility helpers for parsing dates.
|
||||
*/
|
||||
export declare class InfoResult {
|
||||
total: number;
|
||||
/**
|
||||
* The PDF 'Info' dictionary. Typical fields include title, author, subject,
|
||||
* Creator, Producer and Creation/Modification dates. The exact structure is
|
||||
* determined by the PDF and as returned by PDF.js.
|
||||
*/
|
||||
info?: any;
|
||||
metadata?: Metadata;
|
||||
/**
|
||||
* An array of document fingerprint strings provided by PDF.js. Useful
|
||||
* for caching, de-duplication or identifying a document across runs.
|
||||
*/
|
||||
fingerprints?: Array<string | null>;
|
||||
/**
|
||||
* Permission flags for the document as returned by PDF.js (or null).
|
||||
* These flags indicate capabilities such as printing, copying and
|
||||
* other restrictions imposed by the PDF security settings.
|
||||
*/
|
||||
permission?: number[] | null;
|
||||
/**
|
||||
* Optional document outline (bookmarks). When present this is the
|
||||
* hierarchical navigation structure which viewers use for quick access.
|
||||
*/
|
||||
outline?: Array<OutlineNode> | null;
|
||||
pages: Array<PageLinkResult>;
|
||||
/**
|
||||
* Collects dates from different sources (Info dictionary and XMP/XAP metadata)
|
||||
* and returns them as a DateNode where available. This helps callers compare
|
||||
* and choose the most relevant timestamp (for example a creation date vs XMP date).
|
||||
*/
|
||||
getDateNode(): DateNode;
|
||||
/**
|
||||
* Try to parse an ISO-8601 date string from XMP/XAP metadata. If the
|
||||
* value is falsy or cannot be parsed, undefined is returned to indicate
|
||||
* absence or unparsable input.
|
||||
*/
|
||||
private parseISODateString;
|
||||
constructor(total: number);
|
||||
}
|
||||
|
||||
/**
|
||||
* Error thrown when the parsed data is not a valid PDF document.
|
||||
*
|
||||
* Use this exception to signal that the input cannot be interpreted as a PDF
|
||||
* (corrupt file, invalid header, etc.).
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class InvalidPDFException extends Error {
|
||||
/**
|
||||
* Create a new InvalidPDFException.
|
||||
* @param message - Optional error message.
|
||||
* @param cause - Optional underlying cause (preserved on modern runtimes).
|
||||
*/
|
||||
constructor(message?: string, cause?: unknown);
|
||||
}
|
||||
|
||||
export declare class Line extends Shape {
|
||||
from: Point;
|
||||
to: Point;
|
||||
direction: LineDirection;
|
||||
length: number;
|
||||
intersections: Array<Point>;
|
||||
gaps: Array<Line>;
|
||||
constructor(from: Point, to: Point);
|
||||
private init;
|
||||
private _valid;
|
||||
get valid(): boolean;
|
||||
get normalized(): Line;
|
||||
addGap(line: Line): void;
|
||||
containsPoint(p: Point): boolean;
|
||||
addIntersectionPoint(point: Point): void;
|
||||
intersection(line: Line): Point | undefined;
|
||||
transform(matrix: Array<number>): this;
|
||||
}
|
||||
|
||||
export declare enum LineDirection {
|
||||
None = 0,
|
||||
Horizontal = 1,
|
||||
Vertical = 2
|
||||
}
|
||||
|
||||
export declare class LineStore {
|
||||
hLines: Array<Line>;
|
||||
vLines: Array<Line>;
|
||||
add(line: Line): void;
|
||||
addRectangle(rect: Rectangle): void;
|
||||
getTableData(): Array<TableData>;
|
||||
getTables(): Array<Table>;
|
||||
normalize(): void;
|
||||
normalizeHorizontal(): void;
|
||||
normalizeVertical(): void;
|
||||
private fillTable;
|
||||
private tryFill;
|
||||
private margeHorizontalLines;
|
||||
private margeVerticalLines;
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* LoadParameters
|
||||
* PDF loading parameters.
|
||||
*/
|
||||
export declare interface LoadParameters extends DocumentInitParameters {
|
||||
/**
|
||||
* The URL of the PDF.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
url?: string | URL | undefined;
|
||||
/**
|
||||
* Binary PDF data.
|
||||
* Use TypedArrays (e.g., `Uint8Array`) to improve memory usage. If PDF data is BASE64-encoded, use `atob()` to convert it to a binary string first.
|
||||
* **NOTE**: If TypedArrays are used, they will generally be transferred to the worker thread, reducing main-thread memory usage but taking ownership of the array.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
data?: string | number[] | ArrayBuffer | TypedArray | undefined;
|
||||
/**
|
||||
* Basic authentication headers.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
httpHeaders?: Object | undefined;
|
||||
/**
|
||||
* Indicates whether cross-site Access-Control requests should be made using credentials (e.g., cookies or auth headers).
|
||||
* Default: `false`.
|
||||
*/
|
||||
withCredentials?: boolean | undefined;
|
||||
/**
|
||||
* For decrypting password-protected PDFs.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
password?: string | undefined;
|
||||
/**
|
||||
* The PDF file length. Used for progress reports and range requests.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
length?: number | undefined;
|
||||
/**
|
||||
* Allows using a custom range transport implementation.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
range?: PDFDataRangeTransport | undefined;
|
||||
/**
|
||||
* Maximum number of bytes fetched per range request.
|
||||
* Default: `65536` (`2^16`).
|
||||
*/
|
||||
rangeChunkSize?: number | undefined;
|
||||
/**
|
||||
* The worker used for loading and parsing PDF data.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
worker?: PDFWorker | undefined;
|
||||
/**
|
||||
* Controls logging level; use constants from `VerbosityLevel`.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
verbosity?: number | undefined;
|
||||
/**
|
||||
* Base URL of the document, used to resolve relative URLs in annotations and outline items.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
docBaseUrl?: string | undefined;
|
||||
/**
|
||||
* URL where predefined Adobe CMaps are located. Include trailing slash.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
cMapUrl?: string | undefined;
|
||||
/**
|
||||
* Specifies if Adobe CMaps are binary-packed.
|
||||
* Default: `true`.
|
||||
*/
|
||||
cMapPacked?: boolean | undefined;
|
||||
/**
|
||||
* Factory for reading built-in CMap files.
|
||||
* Default: `{DOMCMapReaderFactory}`.
|
||||
*/
|
||||
CMapReaderFactory?: Object | undefined;
|
||||
/**
|
||||
* URL where predefined ICC profiles are located. Include trailing slash.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
iccUrl?: string | undefined;
|
||||
/**
|
||||
* If `true`, non-embedded fonts fall back to system fonts.
|
||||
* Default: `true` in browsers, `false` in Node.js (unless `disableFontFace === true`, then always `false`).
|
||||
*/
|
||||
useSystemFonts?: boolean | undefined;
|
||||
/**
|
||||
* URL for standard font files. Include trailing slash.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
standardFontDataUrl?: string | undefined;
|
||||
/**
|
||||
* Factory for reading standard font files.
|
||||
* Default: `{DOMStandardFontDataFactory}`.
|
||||
*/
|
||||
StandardFontDataFactory?: Object | undefined;
|
||||
/**
|
||||
* URL for WebAssembly files. Include trailing slash.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
wasmUrl?: string | undefined;
|
||||
/**
|
||||
* Factory for reading WASM files.
|
||||
* Default: `{DOMWasmFactory}`.
|
||||
*/
|
||||
WasmFactory?: Object | undefined;
|
||||
/**
|
||||
* Enable `fetch()` in worker thread for CMap/font/WASM files. If `true`, factory options are ignored.
|
||||
* Default: `true` in browsers, `false` in Node.js.
|
||||
*/
|
||||
useWorkerFetch?: boolean | undefined;
|
||||
/**
|
||||
* Attempt to use WebAssembly for better performance (e.g., image decoding).
|
||||
* Default: `true`.
|
||||
*/
|
||||
useWasm?: boolean | undefined;
|
||||
/**
|
||||
* Reject promises (e.g., `getTextContent`) on parse errors instead of recovering partially.
|
||||
* Default: `false`.
|
||||
*/
|
||||
stopAtErrors?: boolean | undefined;
|
||||
/**
|
||||
* Max image size in total pixels (`width * height`). Use `-1` for no limit.
|
||||
* Default: `-1`.
|
||||
*/
|
||||
maxImageSize?: number | undefined;
|
||||
/**
|
||||
* Whether evaluating strings as JS is allowed (for PDF function performance).
|
||||
* Default: `true`.
|
||||
*/
|
||||
isEvalSupported?: boolean | undefined;
|
||||
/**
|
||||
* Whether `OffscreenCanvas` can be used in worker.
|
||||
* Default: `true` in browsers, `false` in Node.js.
|
||||
*/
|
||||
isOffscreenCanvasSupported?: boolean | undefined;
|
||||
/**
|
||||
* Whether `ImageDecoder` can be used in worker.
|
||||
* Default: `true` in browsers, `false` in Node.js.
|
||||
* **NOTE**: Temporarily disabled in Chromium due to bugs:
|
||||
* - Crashes with BMP decoder on huge images ([issue 374807001](https://issues.chromium.org/issues/374807001))
|
||||
* - Broken JPEGs with custom color profiles ([issue 378869810](https://issues.chromium.org/issues/378869810))
|
||||
*/
|
||||
isImageDecoderSupported?: boolean | undefined;
|
||||
/**
|
||||
* Used to determine when to resize images (via `OffscreenCanvas`). Use `-1` to use a slower fallback algorithm.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
canvasMaxAreaInBytes?: number | undefined;
|
||||
/**
|
||||
* Disable `@font-face`/Font Loading API; use built-in glyph renderer instead.
|
||||
* Default: `false` in browsers, `true` in Node.js.
|
||||
*/
|
||||
disableFontFace?: boolean | undefined;
|
||||
/**
|
||||
* Include extra (non-rendering) font properties when exporting font data from worker. Increases memory usage.
|
||||
* Default: `false`.
|
||||
*/
|
||||
fontExtraProperties?: boolean | undefined;
|
||||
/**
|
||||
* Render XFA forms if present.
|
||||
* Default: `false`.
|
||||
*/
|
||||
enableXfa?: boolean | undefined;
|
||||
/**
|
||||
* Explicit document context for creating elements and loading resources. Defaults to current document.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
ownerDocument?: HTMLDocument | undefined;
|
||||
/**
|
||||
* Disable range requests for PDF loading.
|
||||
* Default: `false`.
|
||||
*/
|
||||
disableRange?: boolean | undefined;
|
||||
/**
|
||||
* Disable streaming PDF data.
|
||||
* Default: `false`.
|
||||
*/
|
||||
disableStream?: boolean | undefined;
|
||||
/**
|
||||
* Disable pre-fetching of PDF data. Requires `disableStream: true` to work fully.
|
||||
* Default: `false`.
|
||||
*/
|
||||
disableAutoFetch?: boolean | undefined;
|
||||
/**
|
||||
* Enable debugging hooks (see `web/debugger.js`).
|
||||
* Default: `false`.
|
||||
*/
|
||||
pdfBug?: boolean | undefined;
|
||||
/**
|
||||
* Factory for creating canvases.
|
||||
* Default: `{DOMCanvasFactory}`.
|
||||
*/
|
||||
CanvasFactory?: Object | undefined;
|
||||
/**
|
||||
* Factory for creating SVG filters during rendering.
|
||||
* Default: `{DOMFilterFactory}`.
|
||||
*/
|
||||
FilterFactory?: Object | undefined;
|
||||
/**
|
||||
* Enable hardware acceleration for rendering.
|
||||
* Default: `false`.
|
||||
*/
|
||||
enableHWA?: boolean | undefined;
|
||||
}
|
||||
|
||||
export { Metadata }
|
||||
|
||||
/**
|
||||
* @public
|
||||
* Node representing a single item in the PDF outline (bookmarks).
|
||||
* This mirrors the structure returned by PDF.js' getOutline() API.
|
||||
*/
|
||||
export declare interface OutlineNode {
|
||||
title: string;
|
||||
bold: boolean;
|
||||
italic: boolean;
|
||||
color: Uint8ClampedArray;
|
||||
dest: string | Array<any> | null;
|
||||
url: string | null;
|
||||
unsafeUrl?: string;
|
||||
newWindow?: boolean;
|
||||
count?: number;
|
||||
items: Array<any>;
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* PageImages
|
||||
* - Represents all embedded images found on a single PDF page.
|
||||
* - pageNumber: 1-based page index.
|
||||
* - images: Array of EmbeddedImage objects for this page.
|
||||
*/
|
||||
export declare interface PageImages {
|
||||
pageNumber: number;
|
||||
images: EmbeddedImage[];
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* Per-page link extraction result.
|
||||
* - pageNumber: the physical page index (1-based) within the PDF document.
|
||||
* - pageLabel: optional printed page label shown by PDF viewers (e.g. "iii", "1", "A-1");
|
||||
* this can differ from the physical page number and may be undefined
|
||||
* when the document does not provide labels.
|
||||
* - links: array of text->URL mappings that were found/overlaid on the page.
|
||||
* - width/height: page dimensions in PDF units for the viewport used.
|
||||
*/
|
||||
export declare type PageLinkResult = {
|
||||
pageNumber: number;
|
||||
pageLabel?: string | null;
|
||||
links: Array<{
|
||||
text: string;
|
||||
url: string;
|
||||
}>;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* @public
|
||||
* PageTableResult
|
||||
*/
|
||||
export declare interface PageTableResult {
|
||||
num: number;
|
||||
tables: TableArray[];
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* PageTextResult
|
||||
*/
|
||||
export declare interface PageTextResult {
|
||||
num: number;
|
||||
text: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* ParseParameters
|
||||
* Options to control parsing behavior and output formatting.
|
||||
*/
|
||||
export declare interface ParseParameters {
|
||||
/**
|
||||
* Array of page numbers to parse.
|
||||
* When provided, only these pages will be parsed and returned in the same order.
|
||||
* Example: [1, 3, 5]. Parse only one page: [7].
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
partial?: Array<number>;
|
||||
/**
|
||||
* Parse the first N pages (pages 1..N).
|
||||
* Ignored when `partial` is provided. If both `first` and `last` are set, they define
|
||||
* an explicit inclusive page range (first..last) and this "first N" semantics is ignored.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
first?: number;
|
||||
/**
|
||||
* Parse the last N pages (pages total-N+1..total).
|
||||
* Ignored when `partial` is provided. If both `first` and `last` are set, they define
|
||||
* an explicit inclusive page range (first..last) and this "last N" semantics is ignored.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
last?: number;
|
||||
/**
|
||||
* Collect per-page metadata such as embedded links, title, pageLabel, and dimensions;
|
||||
* ISBN, DOI, abstract, and references are work in progress when getInfo() is used.
|
||||
* Default: `false`.
|
||||
*/
|
||||
parsePageInfo?: boolean;
|
||||
/**
|
||||
* Attempt to detect and include hyperlink annotations (e.g. URLs) associated with text.
|
||||
* Detected links are formatted as Markdown inline links (for example: [text](https://example.com)).
|
||||
* Default: `false`.
|
||||
*/
|
||||
parseHyperlinks?: boolean;
|
||||
/**
|
||||
* Enforce logical line breaks by inserting a newline when the vertical distance
|
||||
* between text items exceeds `lineThreshold`.
|
||||
* Useful to preserve paragraph/line structure when text items are emitted as separate segments.
|
||||
* Default: `true`.
|
||||
*/
|
||||
lineEnforce?: boolean;
|
||||
/**
|
||||
* Threshold to decide whether nearby text items belong to different lines.
|
||||
* Larger values make the parser more likely to start a new line between items.
|
||||
* Default: `4.6`.
|
||||
*/
|
||||
lineThreshold?: number;
|
||||
/**
|
||||
* String inserted between text items on the same line when a sufficiently large horizontal gap is detected.
|
||||
* Typically used to emulate a cell/column separator (for example, "\\t" for tabs).
|
||||
* Default: `'\t'`.
|
||||
*/
|
||||
cellSeparator?: string;
|
||||
/**
|
||||
* Horizontal distance threshold to decide when two text items on the same baseline should be treated as separate cells.
|
||||
* Larger value produces fewer (wider) cells; smaller value creates more cell breaks.
|
||||
* Default: `7`.
|
||||
*/
|
||||
cellThreshold?: number;
|
||||
/**
|
||||
* Optional string appended at the end of each page's extracted text to mark page boundaries.
|
||||
* Supports placeholders `page_number` and `total_number` which are substituted accordingly.
|
||||
* If omitted or empty, no page boundary marker is added.
|
||||
* Default: `'\n-- page_number of total_number --'`.
|
||||
*/
|
||||
pageJoiner?: string;
|
||||
/**
|
||||
* Optional string used to join text items when returning a page's text.
|
||||
* If provided, this value is used instead of the default empty-string joining behavior.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
itemJoiner?: string;
|
||||
/**
|
||||
* Minimum image dimension (in pixels) for width or height.
|
||||
* When set, images where width OR height are below or equal this value will be ignored by `getImage()`.
|
||||
* Useful for excluding tiny decorative or tracking images.
|
||||
* Default: `80`.
|
||||
* Disable: `0`.
|
||||
*/
|
||||
imageThreshold?: number;
|
||||
/**
|
||||
* Screenshot scale factor: use 1 for the original size, 1.5 for a 50% larger image, etc.
|
||||
* Default: `1`.
|
||||
*/
|
||||
scale?: number;
|
||||
/**
|
||||
* Desired screenshot width in pixels.
|
||||
* When set, the scale option is ignored.
|
||||
* Default: `undefined`.
|
||||
*/
|
||||
desiredWidth?: number;
|
||||
/**
|
||||
* Applies to both getImage() and getScreenshot(): include the image as a base64 data URL string.
|
||||
* Default: `true`.
|
||||
*/
|
||||
imageDataUrl?: boolean;
|
||||
/**
|
||||
* Applies to both getImage() and getScreenshot(): include the image as a binary buffer.
|
||||
* Default: `true`.
|
||||
*/
|
||||
imageBuffer?: boolean;
|
||||
/**
|
||||
* Include marked content items in the items array of TextContent to capture PDF "marked content".
|
||||
* Enables tags (MCID, role/props) and structural/accessibility information useful for mapping text ↔ structure.
|
||||
* For plain text extraction it's usually false (trade-off: larger output).
|
||||
* Default: `false`.
|
||||
*/
|
||||
includeMarkedContent?: boolean;
|
||||
/**
|
||||
* When true, text normalization is NOT performed in the worker thread.
|
||||
* For plain text extraction, normalizing in the worker (false) is usually recommended.
|
||||
* Default: `false`.
|
||||
*/
|
||||
disableNormalization?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Error indicating a PDF file requires a password or the provided password is incorrect.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class PasswordException extends Error {
|
||||
/**
|
||||
* Create a new PasswordException.
|
||||
* @param message - Optional error message.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, cause?: unknown);
|
||||
}
|
||||
|
||||
export { PDFDataRangeTransport }
|
||||
|
||||
/**
|
||||
* @public
|
||||
* Loads PDF documents and exposes helpers for text, image, table, metadata, and screenshot extraction.
|
||||
*/
|
||||
export declare class PDFParse {
|
||||
private readonly options;
|
||||
private doc;
|
||||
progress: {
|
||||
loaded: number;
|
||||
total: number;
|
||||
};
|
||||
/**
|
||||
* Create a new parser with `LoadParameters`.
|
||||
* Converts Node.js `Buffer` data to `Uint8Array` automatically and ensures a default verbosity level.
|
||||
* @param options - Initialization parameters.
|
||||
*/
|
||||
constructor(options: LoadParameters);
|
||||
destroy(): Promise<void>;
|
||||
static get isNodeJS(): boolean;
|
||||
static setWorker(workerSrc?: string): string;
|
||||
/**
|
||||
* Load document-level metadata (info, outline, permissions, page labels) and optionally gather per-page link details.
|
||||
* @param params - Parse options; set `parsePageInfo` to collect per-page metadata described in `ParseParameters`.
|
||||
* @returns Aggregated document metadata in an `InfoResult`.
|
||||
*/
|
||||
getInfo(params?: ParseParameters): Promise<InfoResult>;
|
||||
private getPageLinks;
|
||||
/**
|
||||
* Extract plain text for each requested page, optionally enriching hyperlinks and enforcing line or cell separators.
|
||||
* @param params - Parse options controlling pagination, link handling, and line/cell thresholds.
|
||||
* @returns A `TextResult` containing page-wise text and a concatenated document string.
|
||||
*/
|
||||
getText(params?: ParseParameters): Promise<TextResult>;
|
||||
private load;
|
||||
private shouldParse;
|
||||
private getPageText;
|
||||
private getHyperlinks;
|
||||
/**
|
||||
* Extract embedded images from requested pages.
|
||||
*
|
||||
* Behavior notes:
|
||||
* - Pages are selected according to ParseParameters (partial, first, last).
|
||||
* - Images smaller than `params.imageThreshold` (width OR height) are skipped.
|
||||
* - Returned ImageResult contains per-page PageImages; each image entry includes:
|
||||
* - data: Uint8Array (present when params.imageBuffer === true)
|
||||
* - dataUrl: string (present when params.imageDataUrl === true)
|
||||
* - width, height, kind, name
|
||||
* - Works in both Node.js (canvas.toBuffer) and browser (canvas.toDataURL) environments.
|
||||
*
|
||||
* @param params - ParseParameters controlling page selection, thresholds and output format.
|
||||
* @returns Promise<ImageResult> with extracted images grouped by page.
|
||||
*/
|
||||
getImage(params?: ParseParameters): Promise<ImageResult>;
|
||||
private convertToRGBA;
|
||||
private resolveEmbeddedImage;
|
||||
/**
|
||||
* Render pages to raster screenshots.
|
||||
*
|
||||
* Behavior notes:
|
||||
* - Pages are selected according to ParseParameters (partial, first, last).
|
||||
* - Use params.scale for zoom; if params.desiredWidth is specified it takes precedence.
|
||||
* - Each ScreenshotResult page contains:
|
||||
* - data: Uint8Array (when params.imageBuffer === true)
|
||||
* - dataUrl: string (when params.imageDataUrl === true)
|
||||
* - pageNumber, width, height, scale
|
||||
* - Works in both Node.js (canvas.toBuffer) and browser (canvas.toDataURL) environments.
|
||||
*
|
||||
* @param parseParams - ParseParameters controlling page selection and render options.
|
||||
* @returns Promise<ScreenshotResult> with rendered page images.
|
||||
*/
|
||||
getScreenshot(parseParams?: ParseParameters): Promise<ScreenshotResult>;
|
||||
/**
|
||||
* Detect and extract tables from pages by analysing vector drawing operators, then populate cells with text.
|
||||
*
|
||||
* Behavior notes:
|
||||
* - Scans operator lists for rectangles/lines that form table grids (uses PathGeometry and LineStore).
|
||||
* - Normalizes detected geometry and matches positioned text to table cells.
|
||||
* - Honors ParseParameters for page selection.
|
||||
*
|
||||
* @param params - ParseParameters controlling which pages to analyse (partial/first/last).
|
||||
* @returns Promise<TableResult> containing discovered tables per page.
|
||||
*/
|
||||
getTable(params?: ParseParameters): Promise<TableResult>;
|
||||
private getPathGeometry;
|
||||
private getPageTables;
|
||||
private fillPageTables;
|
||||
}
|
||||
|
||||
export { PDFWorker }
|
||||
|
||||
export declare class Point extends Shape {
|
||||
x: number;
|
||||
y: number;
|
||||
constructor(x: number, y: number);
|
||||
equal(point: Point): boolean;
|
||||
transform(matrix: Array<number>): this;
|
||||
}
|
||||
|
||||
export declare class Rectangle extends Shape {
|
||||
from: Point;
|
||||
width: number;
|
||||
height: number;
|
||||
constructor(from: Point, width: number, height: number);
|
||||
get to(): Point;
|
||||
getLines(): Line[];
|
||||
transform(matrix: Array<number>): this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents an HTTP/network response error encountered while fetching PDF data.
|
||||
*
|
||||
* The `status` and `missing` properties mirror values that may be provided
|
||||
* by the underlying PDF library's network layer.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class ResponseException extends Error {
|
||||
/**
|
||||
* Create a new ResponseException.
|
||||
* @param message - Optional error message.
|
||||
* @param status - Optional numeric HTTP/status code.
|
||||
* @param missing - Optional field describing missing resources.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, status?: number, missing?: unknown, cause?: unknown);
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* SafeParseParameters
|
||||
*/
|
||||
export declare type SafeParseParameters = Required<Pick<ParseParameters, 'lineThreshold' | 'cellThreshold' | 'scale'>> & ParseParameters;
|
||||
|
||||
/**
|
||||
* @public
|
||||
* Screenshot
|
||||
*/
|
||||
export declare interface Screenshot {
|
||||
data: Uint8Array;
|
||||
dataUrl: string;
|
||||
pageNumber: number;
|
||||
width: number;
|
||||
height: number;
|
||||
scale: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* ScreenshotResult
|
||||
*/
|
||||
export declare class ScreenshotResult {
|
||||
pages: Array<Screenshot>;
|
||||
total: number;
|
||||
constructor(total: number);
|
||||
}
|
||||
|
||||
export declare function setDefaultParseParameters(params: ParseParameters): SafeParseParameters;
|
||||
|
||||
export declare abstract class Shape {
|
||||
static tolerance: number;
|
||||
abstract transform(matrix: Array<number>): this;
|
||||
static applyTransform(p: Array<number>, m: Array<number>): Array<number>;
|
||||
}
|
||||
|
||||
export declare class Table {
|
||||
hLines: Array<Line>;
|
||||
vLines: Array<Line>;
|
||||
constructor(line: Line);
|
||||
get isValid(): boolean;
|
||||
get rowPivots(): Array<number>;
|
||||
get colPivots(): Array<number>;
|
||||
add(line: Line): boolean;
|
||||
private intersection;
|
||||
private getSameHorizontal;
|
||||
private getSameVertical;
|
||||
private mergeHorizontalLines;
|
||||
private mergeVerticalLines;
|
||||
normalize(): void;
|
||||
verticalExists(line: Line, y1: number, y2: number): boolean;
|
||||
horizontalExists(line: Line, x1: number, x2: number): boolean;
|
||||
private findBottomLineIndex;
|
||||
private findVerticalLineIndexs;
|
||||
private getRow;
|
||||
toData(): TableData;
|
||||
}
|
||||
|
||||
export declare type TableArray = Array<Array<string>>;
|
||||
|
||||
declare type TableCell = {
|
||||
minXY: Point;
|
||||
maxXY: Point;
|
||||
width: number;
|
||||
height: number;
|
||||
colspan?: number;
|
||||
rowspan?: number;
|
||||
text: Array<string>;
|
||||
};
|
||||
|
||||
declare class TableData {
|
||||
minXY: Point;
|
||||
maxXY: Point;
|
||||
rows: Array<TableRow>;
|
||||
private rowPivots;
|
||||
private colPivots;
|
||||
constructor(minXY: Point, maxXY: Point, rowPivots: Array<number>, colPivots: Array<number>);
|
||||
findCell(x: number, y: number): TableCell | undefined;
|
||||
get cellCount(): number;
|
||||
get rowCount(): number;
|
||||
check(): boolean;
|
||||
toArray(): string[][];
|
||||
}
|
||||
|
||||
/**
|
||||
* @public
|
||||
* TableResult
|
||||
*/
|
||||
export declare class TableResult {
|
||||
pages: Array<PageTableResult>;
|
||||
mergedTables: TableArray[];
|
||||
total: number;
|
||||
constructor(total: number);
|
||||
}
|
||||
|
||||
declare type TableRow = Array<TableCell>;
|
||||
|
||||
/**
|
||||
* @public
|
||||
* TextResult
|
||||
*/
|
||||
export declare class TextResult {
|
||||
pages: Array<PageTextResult>;
|
||||
text: string;
|
||||
total: number;
|
||||
getPageText(num: number): string;
|
||||
constructor(total: number);
|
||||
}
|
||||
|
||||
export declare type TypedArray = Int8Array | Uint8Array | Uint8ClampedArray | Int16Array | Uint16Array | Int32Array | Uint32Array | Float32Array | Float64Array;
|
||||
|
||||
/**
|
||||
* Generic wrapper for errors where the library cannot classify the cause.
|
||||
*
|
||||
* The `details` property may contain additional information provided by the
|
||||
* underlying PDF library.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export declare class UnknownErrorException extends Error {
|
||||
/**
|
||||
* Create a new UnknownErrorException.
|
||||
* @param message - Optional error message.
|
||||
* @param details - Optional additional details from the PDF library.
|
||||
* @param cause - Optional underlying cause.
|
||||
*/
|
||||
constructor(message?: string, details?: unknown, cause?: unknown);
|
||||
}
|
||||
|
||||
export { VerbosityLevel }
|
||||
|
||||
export { }
|
||||
31974
node_modules/pdf-parse/dist/pdf-parse/web/pdf-parse.es.js
generated
vendored
Normal file
31974
node_modules/pdf-parse/dist/pdf-parse/web/pdf-parse.es.js
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1
node_modules/pdf-parse/dist/pdf-parse/web/pdf-parse.es.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/web/pdf-parse.es.js.map
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
2
node_modules/pdf-parse/dist/pdf-parse/web/pdf-parse.umd.js
generated
vendored
Normal file
2
node_modules/pdf-parse/dist/pdf-parse/web/pdf-parse.umd.js
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
1
node_modules/pdf-parse/dist/pdf-parse/web/pdf-parse.umd.js.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/web/pdf-parse.umd.js.map
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
65152
node_modules/pdf-parse/dist/pdf-parse/web/pdf.worker.mjs
generated
vendored
Normal file
65152
node_modules/pdf-parse/dist/pdf-parse/web/pdf.worker.mjs
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
1
node_modules/pdf-parse/dist/pdf-parse/web/pdf.worker.mjs.map
generated
vendored
Normal file
1
node_modules/pdf-parse/dist/pdf-parse/web/pdf.worker.mjs.map
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
105
node_modules/pdf-parse/dist/worker/cjs/index.cjs
generated
vendored
Normal file
105
node_modules/pdf-parse/dist/worker/cjs/index.cjs
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user