152 lines
6.5 KiB
JavaScript
152 lines
6.5 KiB
JavaScript
"use strict";
|
||
Object.defineProperty(exports, "__esModule", { value: true });
|
||
exports.resolveCompanyAnchors = resolveCompanyAnchors;
|
||
const CONTRACT_PATTERN = /(?:\u0434\u043e\u0433\u043e\u0432\u043e\u0440(?:\u0430|\u0443|ом|е)?\s*(?:№|#|n)?\s*([a-zа-я0-9./_-]+))/giu;
|
||
const DOCUMENT_NUMBER_PATTERN = /(?:(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:-\u0444\u0430\u043a\u0442\u0443\u0440(?:а|ы))?|\u0440\u0435\u0430\u043b\u0438\u0437\u0430\u0446(?:ия|ии)|\u0430\u043a\u0442)\s*(?:№|#|n)\s*([a-zа-я0-9./_-]+))/giu;
|
||
const DATE_PATTERN = /\b(?:\d{1,2}[./]\d{1,2}[./]\d{2,4}|\d{1,2}\s+(?:\u044f\u043d\u0432\u0430\u0440\u044f|\u0444\u0435\u0432\u0440\u0430\u043b\u044f|\u043c\u0430\u0440\u0442\u0430|\u0430\u043f\u0440\u0435\u043b\u044f|\u043c\u0430\u044f|\u0438\u044e\u043d\u044f|\u0438\u044e\u043b\u044f|\u0430\u0432\u0433\u0443\u0441\u0442\u0430|\u0441\u0435\u043d\u0442\u044f\u0431\u0440\u044f|\u043e\u043a\u0442\u044f\u0431\u0440\u044f|\u043d\u043e\u044f\u0431\u0440\u044f|\u0434\u0435\u043a\u0430\u0431\u0440\u044f))\b/giu;
|
||
const AMOUNT_PATTERN = /\b(?:\d{1,3}(?:[ \u00A0]\d{3})+(?:[.,]\d{2})?|\d+[.,]\d{2})\b/gu;
|
||
const CONTEXTUAL_ACCOUNT_PATTERN = /(?:\b(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:а|у|ом|ов)?|account|schet)\b\s*(?:№|#|:)?\s*)(\d{2}(?:\.\d{2})?)/giu;
|
||
const ACCOUNT_PAIR_PATTERN = /\b(\d{2}\.\d{2})\s*\/\s*(\d{2}\.\d{2})\b/gu;
|
||
const PERIOD_PATTERN = /\b(?:20\d{2}(?:[-./](?:0?[1-9]|1[0-2]))?|(?:\u0438\u044e\u043b\u044c|\u0438\u044e\u043d\u044c|\u0430\u0432\u0433\u0443\u0441\u0442|\u0441\u0435\u043d\u0442\u044f\u0431\u0440\u044c|\u043e\u043a\u0442\u044f\u0431\u0440\u044c|\u043d\u043e\u044f\u0431\u0440\u044c|\u0434\u0435\u043a\u0430\u0431\u0440\u044c|\u044f\u043d\u0432\u0430\u0440\u044c|\u0444\u0435\u0432\u0440\u0430\u043b\u044c|\u043c\u0430\u0440\u0442|\u0430\u043f\u0440\u0435\u043b\u044c|\u043c\u0430\u0439)\s+20\d{2})\b/giu;
|
||
const DOCUMENT_TYPE_PATTERNS = [
|
||
{ name: "invoice", pattern: /\b(?:\u0441\u0447(?:\u0435|\u0451)\u0442-\u0444\u0430\u043a\u0442\u0443\u0440|invoice)\b/iu },
|
||
{ name: "realization", pattern: /\b(?:\u0440\u0435\u0430\u043b\u0438\u0437\u0430\u0446|realization)\b/iu },
|
||
{ name: "payment", pattern: /\b(?:\u043e\u043f\u043b\u0430\u0442|payment|\u043f\u043b\u0430\u0442\u0435\u0436)\b/iu },
|
||
{ name: "receipt", pattern: /\b(?:\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d|receipt)\b/iu },
|
||
{ name: "close", pattern: /\b(?:\u0437\u0430\u043a\u0440\u044b\u0442\u0438|\u0440\u0435\u0433\u043b\u0430\u043c\u0435\u043d\u0442)\b/iu },
|
||
{ name: "rbp_writeoff", pattern: /\b(?:\u0440\u0431\u043f|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0435)\b/iu },
|
||
{ name: "amortization", pattern: /\b(?:\u0430\u043c\u043e\u0440\u0442\u0438\u0437|amortization)\b/iu }
|
||
];
|
||
const KNOWN_ACCOUNT_PREFIXES = new Set([
|
||
"01",
|
||
"02",
|
||
"07",
|
||
"08",
|
||
"10",
|
||
"13",
|
||
"19",
|
||
"20",
|
||
"21",
|
||
"23",
|
||
"25",
|
||
"26",
|
||
"41",
|
||
"43",
|
||
"44",
|
||
"45",
|
||
"50",
|
||
"51",
|
||
"52",
|
||
"55",
|
||
"57",
|
||
"58",
|
||
"60",
|
||
"62",
|
||
"66",
|
||
"67",
|
||
"68",
|
||
"69",
|
||
"70",
|
||
"71",
|
||
"73",
|
||
"76",
|
||
"90",
|
||
"91",
|
||
"94",
|
||
"96",
|
||
"97"
|
||
]);
|
||
function uniqueStrings(values, limit = 48) {
|
||
return Array.from(new Set(values.map((item) => String(item ?? "").trim()).filter(Boolean))).slice(0, limit);
|
||
}
|
||
function normalizeAnchorToken(value) {
|
||
return String(value ?? "")
|
||
.replace(/\s+/g, " ")
|
||
.trim();
|
||
}
|
||
function collectMatches(text, pattern, useCaptures = true) {
|
||
const values = [];
|
||
pattern.lastIndex = 0;
|
||
for (const match of text.matchAll(pattern)) {
|
||
if (!match)
|
||
continue;
|
||
if (useCaptures && match.length > 1) {
|
||
for (let i = 1; i < match.length; i += 1) {
|
||
const token = normalizeAnchorToken(match[i] ?? "");
|
||
if (token)
|
||
values.push(token);
|
||
}
|
||
continue;
|
||
}
|
||
const token = normalizeAnchorToken(match[0] ?? "");
|
||
if (token)
|
||
values.push(token);
|
||
}
|
||
return uniqueStrings(values);
|
||
}
|
||
function isKnownAccount(value) {
|
||
const token = String(value ?? "").trim();
|
||
const match = token.match(/^(\d{2})/);
|
||
if (!match) {
|
||
return false;
|
||
}
|
||
return KNOWN_ACCOUNT_PREFIXES.has(match[1]);
|
||
}
|
||
function collectAccountAnchors(text) {
|
||
const tokens = new Set();
|
||
for (const token of collectMatches(text, CONTEXTUAL_ACCOUNT_PATTERN, true)) {
|
||
if (isKnownAccount(token)) {
|
||
tokens.add(token);
|
||
}
|
||
}
|
||
ACCOUNT_PAIR_PATTERN.lastIndex = 0;
|
||
for (const match of text.matchAll(ACCOUNT_PAIR_PATTERN)) {
|
||
const left = normalizeAnchorToken(match[1] ?? "");
|
||
const right = normalizeAnchorToken(match[2] ?? "");
|
||
if (left && isKnownAccount(left)) {
|
||
tokens.add(left);
|
||
}
|
||
if (right && isKnownAccount(right)) {
|
||
tokens.add(right);
|
||
}
|
||
}
|
||
return Array.from(tokens).slice(0, 24);
|
||
}
|
||
function collectDocumentTypeAnchors(text) {
|
||
return uniqueStrings(DOCUMENT_TYPE_PATTERNS.filter((entry) => entry.pattern.test(text)).map((entry) => entry.name), 12);
|
||
}
|
||
function flattenAnchors(input) {
|
||
return uniqueStrings([
|
||
...input.contract_numbers,
|
||
...input.document_numbers,
|
||
...input.dates,
|
||
...input.amounts,
|
||
...input.accounts.map((item) => `account:${item}`),
|
||
...input.periods.map((item) => `period:${item}`),
|
||
...input.document_types.map((item) => `doc_type:${item}`)
|
||
], 64);
|
||
}
|
||
function resolveCompanyAnchors(input) {
|
||
const text = String(input ?? "");
|
||
const contractNumbers = collectMatches(text, CONTRACT_PATTERN, true).map((item) => `\u0434\u043e\u0433\u043e\u0432\u043e\u0440 № ${item}`);
|
||
const documentNumbers = collectMatches(text, DOCUMENT_NUMBER_PATTERN, true).map((item) => `\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442 № ${item}`);
|
||
const dates = collectMatches(text, DATE_PATTERN, false);
|
||
const amounts = collectMatches(text, AMOUNT_PATTERN, false);
|
||
const accounts = collectAccountAnchors(text);
|
||
const periods = collectMatches(text, PERIOD_PATTERN, false);
|
||
const documentTypes = collectDocumentTypeAnchors(text);
|
||
const resultBase = {
|
||
contract_numbers: uniqueStrings(contractNumbers, 12),
|
||
document_numbers: uniqueStrings(documentNumbers, 16),
|
||
dates: uniqueStrings(dates, 16),
|
||
amounts: uniqueStrings(amounts, 16),
|
||
accounts: uniqueStrings(accounts, 24),
|
||
periods: uniqueStrings(periods, 12),
|
||
document_types: documentTypes
|
||
};
|
||
return {
|
||
...resultBase,
|
||
all: flattenAnchors(resultBase)
|
||
};
|
||
}
|