NODEDC_1C/llm_normalizer/backend/dist/services/addressIntentResolver.js

556 lines
18 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.resolveAddressIntent = resolveAddressIntent;
const RECEIVABLES_STRONG = [
"кто должен нам",
"нам должны",
"who owes us",
"receivable",
"receivables",
"debtor",
"debtors",
"дебитор",
"дебиторск"
];
const PAYABLES_STRONG = [
"кому должны мы",
"мы должны",
"who we owe",
"payable",
"payables",
"creditor",
"creditors",
"кредитор",
"кредиторск"
];
const ACCOUNT_BALANCE_HINTS = [
"account balance",
"balance by account",
"saldo",
"баланс",
"остаток по счет",
"сальдо по счет",
"по счету",
"что на счете",
"что на счёте",
"на конец"
];
const DOCUMENTS_FORMING_BALANCE_HINTS = [
"documents forming balance",
"docs forming balance",
"documents form balance",
"docs form balance",
"balance documents",
"documents for balance",
"which documents form balance",
"из чего состоит остаток",
"какие документы формируют остаток",
"раскрой остаток по документам",
"документы под остатком"
];
const OPEN_CONTRACTS_HINTS = [
"open contracts",
"unclosed contracts",
"незакрыт",
"не закрыт",
"открыт",
"договор"
];
const OPEN_ITEMS_HINTS = [
"open items",
"unclosed items",
"хвост",
"висят",
"незакрыт",
"открыт",
"долг",
"задолж",
"позици"
];
const DOCUMENTS_BY_COUNTERPARTY_HINTS = [
"documents by counterparty",
"docs by counterparty",
"documents by company",
"documents by supplier",
"documents by customer",
"documents by client",
"documents by partner",
"show documents by counterparty",
"list documents by counterparty",
"документы по",
"доступные документы",
"список документов",
"документ",
"доки",
"доки по",
"док по",
"по контрагент"
];
const BANK_OPERATIONS_BY_COUNTERPARTY_HINTS = [
"bank operations by counterparty",
"bank payments by counterparty",
"payment orders by counterparty",
"bank operations by company",
"bank operations by supplier",
"bank operations by customer",
"show bank operations by counterparty",
"bank ops",
"bank oper",
"transactions by counterparty",
"транзак",
"банк",
"банков",
"по банку",
"опер",
"выписк",
"платеж",
"платёж",
"оплат",
"списан",
"списани",
"поступлен",
"поступлени",
"движени"
];
const DOCUMENTS_BY_CONTRACT_HINTS = [
"documents by contract",
"docs by contract",
"show documents by contract",
"list documents by contract",
"документы по договору",
"доки по договору",
"док по договору",
"документы договор",
"договор"
];
const BANK_OPERATIONS_BY_CONTRACT_HINTS = [
"bank operations by contract",
"bank payments by contract",
"payment orders by contract",
"transactions by contract",
"bank ops by contract",
"банковские операции по договору",
"платежи по договору",
"выписка по договору"
];
const BANK_OPERATION_CORE_HINTS = [
"банк",
"банков",
"операц",
"опер",
"выписк",
"платеж",
"платёж",
"оплат",
"списан",
"поступлен",
"движени",
"транзак",
"bank",
"payment",
"payments",
"transaction",
"transactions",
"statement",
"wire"
];
function hasAny(text, patterns) {
return patterns.some((item) => text.includes(item));
}
function hasCompactAccountCodeToken(text) {
// Match compact account tokens like 60.01 / 62, while avoiding date fragments.
return /(?<![\d-])\d{2}(?:[.,]\d{1,2})(?![\d-])/u.test(text);
}
function hasDocumentsFormingBalanceSignal(text) {
if (hasAny(text, DOCUMENTS_FORMING_BALANCE_HINTS)) {
return true;
}
const hasLooseAccountCodeToken = hasCompactAccountCodeToken(text);
const hasDocLexeme = /(?:документ|док(?:и|ам|ах|ов|а)?)/u.test(text);
const hasFormingLexeme = text.includes("формир");
const hasBalanceLexeme = text.includes("остат");
const hasAccountLexeme = text.includes("счет") || text.includes("счёт") || hasAccountNumberAnchor(text) || hasLooseAccountCodeToken;
if (hasDocLexeme && hasFormingLexeme && hasBalanceLexeme && hasAccountLexeme) {
return true;
}
if (hasDocLexeme &&
hasBalanceLexeme &&
hasAccountLexeme &&
(text.includes("раскрой") || text.includes("раскид") || text.includes("под остатк"))) {
return true;
}
if (hasBalanceLexeme && hasAccountLexeme && text.includes("из чего состоит")) {
return true;
}
return hasBalanceLexeme && hasAccountLexeme && /из\s+чего\s+остат/u.test(text);
}
function hasDocumentsFormingBalanceAccountAnchor(text) {
if (hasAccountNumberAnchor(text) || text.includes("счет") || text.includes("счёт")) {
return true;
}
// Allow compact account mentions like "60.01" in slang prompts without explicit "счет".
return hasCompactAccountCodeToken(text);
}
function hasAccountBalanceSignal(text) {
if (hasAny(text, ACCOUNT_BALANCE_HINTS)) {
return true;
}
const hasAccountLexeme = hasAccountNumberAnchor(text) || hasCompactAccountCodeToken(text) || /(?:^|\s)по\s+\d{2}(?:[.,]\d{1,2})?(?=$|[\s,.;:!?])/u.test(text);
const hasBalanceLexeme = text.includes("баланс") ||
text.includes("остат") ||
text.includes("сальд") ||
text.includes("saldo") ||
text.includes("balance") ||
text.includes("скока") ||
text.includes("сколько") ||
/на\s+конец/u.test(text);
return hasAccountLexeme && hasBalanceLexeme;
}
function hasOpenContractsListSignal(text) {
const hasContractLexeme = text.includes("договор") || text.includes("contract") || text.includes("dogovor");
const hasOpenLexeme = /(?:незакрыт|не\s+закрыт|открыт|open|unclosed)/iu.test(text);
if (!hasContractLexeme || !hasOpenLexeme) {
return false;
}
// Query about a specific contract should stay in open-items lane.
if (hasContractNumberLikeToken(text)) {
return false;
}
// Debt/tail wording indicates open-items intent, not contract list.
if (/(?:долг|задолж|хвост|позиц|open\s+items|unclosed\s+items|взаиморасчет|взаиморасчёт)/iu.test(text)) {
return false;
}
return true;
}
function isLikelyCounterpartyToken(rawToken) {
const token = String(rawToken ?? "").trim().toLowerCase();
if (!token || token.length < 2) {
return false;
}
if (/^\d+$/.test(token)) {
return false;
}
if (/^(?:19|20)\d{2}$/.test(token)) {
return false;
}
const stopWords = new Set([
"за",
"с",
"по",
"на",
"и",
"или",
"док",
"доки",
"доки?",
"документ",
"документы",
"документов",
"банк",
"банковские",
"операции",
"платежи",
"платеж",
"платёж",
"контрагент",
"контрагенту",
"контрагента",
"компания",
"компании",
"организация",
"организации",
"год",
"года",
"г",
"плс",
"pls",
"пж",
"пжлст",
"пожалуйста",
"бля",
"блять",
"епт",
"ёпт",
"епта",
"нах",
"нахуй"
]);
return !stopWords.has(token);
}
function hasPartyAnchorMention(text) {
return (text.includes("контраг") ||
text.includes("контра") ||
text.includes("counterparty") ||
text.includes("компан") ||
text.includes("company") ||
text.includes("организац") ||
text.includes("supplier") ||
text.includes("vendor") ||
text.includes("customer") ||
text.includes("client") ||
text.includes("partner") ||
text.includes("поставщик") ||
text.includes("клиент") ||
text.includes("покупател") ||
text.includes("партнер"));
}
function hasContractAnchorMention(text) {
return (text.includes("договор") ||
text.includes("контракт") ||
/\bдог\.?\b/iu.test(text) ||
text.includes("дог.") ||
text.includes("contract") ||
text.includes("dogovor"));
}
function hasContractNumberLikeToken(text) {
if (/(?:^|[\s([{])(?:№|#|n)\s*[a-zа-яё0-9][a-zа-яё0-9./_-]{1,}(?=$|[\s,.;:!?)\]}])/iu.test(text)) {
return true;
}
const rawTokens = text
.split(/[\s,;:!?()[\]{}"«»]+/u)
.map((token) => token.replace(/^[^\p{L}\p{N}#№]+|[^\p{L}\p{N}./_-]+$/gu, "").trim())
.filter((token) => token.length > 0);
for (const rawToken of rawTokens) {
const token = String(rawToken ?? "").trim();
if (!/^\d{1,6}[./_-]\d{1,6}(?:[./_-]\d{1,6})?$/u.test(token)) {
continue;
}
if (!token) {
continue;
}
if (/^\d{1,2}\.\d{1,2}$/u.test(token)) {
// Likely an account code like 60.01/51.00, not a contract number.
continue;
}
const parts = token.split(/[./_-]+/u).map((part) => Number(part));
if (!parts.every((part) => Number.isFinite(part))) {
return true;
}
if (parts.length === 2) {
const [a, b] = parts;
const yearFirst = a >= 1900 && a <= 2099 && b >= 1 && b <= 12;
const yearSecond = b >= 1900 && b <= 2099 && a >= 1 && a <= 12;
if (yearFirst || yearSecond) {
continue;
}
return true;
}
if (parts.length === 3) {
const [a, b, c] = parts;
const ymd = a >= 1900 && a <= 2099 && b >= 1 && b <= 12 && c >= 1 && c <= 31;
const dmy = c >= 1900 && c <= 2099 && a >= 1 && a <= 31 && b >= 1 && b <= 12;
if (ymd || dmy) {
continue;
}
return true;
}
return true;
}
return false;
}
function hasContractAnchorSignal(text) {
if (hasContractAnchorMention(text)) {
return true;
}
// Allow short forms like "19/15" for follow-up prompts if document/bank signal exists.
return hasContractNumberLikeToken(text) && hasDocsOrBankSignal(text);
}
function hasLooseByAnchorMention(text) {
const match = text.match(/(?:^|\s)по\s+([a-zа-яё][a-zа-яё0-9._-]{1,})(?=[\s,.;:!?)]|$)/iu);
if (!match) {
return false;
}
const token = String(match[1] ?? "").toLowerCase();
if (!token) {
return false;
}
const stopWords = new Set([
"контрагенту",
"контрагента",
"контре",
"компании",
"компанию",
"организации",
"организацию",
"поставщику",
"поставщика",
"клиенту",
"клиента",
"покупателю",
"покупателя",
"партнеру",
"партнера",
"договору",
"договора",
"счету",
"счёту",
"дате",
"периоду",
"период",
"документам",
"докам"
]);
return !stopWords.has(token);
}
function hasImplicitCounterpartyAnchorAroundDocs(text) {
const beforeDocsMatch = text.match(/(?:^|\s)([a-zа-яё][a-zа-яё0-9._-]{1,})\s+(?:док(?:и|ум(?:ент(?:ы|ов|ам|а)?)?)|docs?|documents?)(?=[\s,.;:!?)]|$)/iu);
if (beforeDocsMatch && isLikelyCounterpartyToken(String(beforeDocsMatch[1] ?? ""))) {
return true;
}
const afterDocsMatch = text.match(/(?:док(?:и|ум(?:ент(?:ы|ов|ам|а)?)?)|docs?|documents?)\s+(?:по\s+)?([a-zа-яё][a-zа-яё0-9._-]{1,})(?=[\s,.;:!?)]|$)/iu);
if (afterDocsMatch && isLikelyCounterpartyToken(String(afterDocsMatch[1] ?? ""))) {
return true;
}
return false;
}
function hasDocsOrBankSignal(text) {
return /(?:док(?:и|умент|ументы|ументов)|docs?|documents?|банк|выписк|платеж|платёж|оплат|transactions?|bank\s+ops|bank\s+operations?)/iu.test(text);
}
function hasBankOperationSignal(text) {
return hasAny(text, BANK_OPERATION_CORE_HINTS) || hasAny(text, BANK_OPERATIONS_BY_COUNTERPARTY_HINTS) || hasAny(text, BANK_OPERATIONS_BY_CONTRACT_HINTS);
}
function hasDocumentSignal(text) {
return (text.includes("док") ||
text.includes("доки") ||
text.includes("документ") ||
text.includes("docs") ||
text.includes("documents"));
}
function hasHeuristicCounterpartyAnchor(text) {
if (!hasDocsOrBankSignal(text) && !hasBankOperationSignal(text)) {
return false;
}
const tokens = String(text ?? "")
.split(/[^a-zа-яё0-9._-]+/iu)
.map((item) => item.trim())
.filter((item) => item.length > 0);
for (const token of tokens) {
const lowered = token.toLowerCase();
if (!isLikelyCounterpartyToken(lowered)) {
continue;
}
if (/^\d{2}$/.test(lowered) || /^\d{4}$/.test(lowered)) {
continue;
}
if (/(?:^за$|^for$|^from$|^to$|^по$|^с$|^год$|^года$|^г$|^year$)/iu.test(lowered)) {
continue;
}
return true;
}
return false;
}
function hasGenericAddressLookupSignal(text) {
return (/\bесть\b/iu.test(text) ||
/\bпокажи\b/iu.test(text) ||
/\bвыведи\b/iu.test(text) ||
/\bкакие\b/iu.test(text) ||
/\bчто(?:-|\s)?то\b/iu.test(text) ||
/за\s+любой\s+период/iu.test(text) ||
/за\s+вс[её]\s+время/iu.test(text) ||
/for\s+all\s+time/iu.test(text) ||
/all\s+time/iu.test(text));
}
function hasAccountNumberAnchor(text) {
return /(?:account|сч[её]т|счет)\D{0,12}\d{2}(?:[.,]\d{1,2})?/i.test(text);
}
function resolveAddressIntent(userMessage) {
const text = String(userMessage ?? "").trim().toLowerCase();
if (hasAny(text, RECEIVABLES_STRONG)) {
return {
intent: "list_receivables_counterparties",
confidence: "high",
reasons: ["receivables_signal_detected"]
};
}
if (hasAny(text, PAYABLES_STRONG)) {
return {
intent: "list_payables_counterparties",
confidence: "high",
reasons: ["payables_signal_detected"]
};
}
if (hasDocumentsFormingBalanceSignal(text) && hasDocumentsFormingBalanceAccountAnchor(text)) {
return {
intent: "documents_forming_balance",
confidence: "high",
reasons: ["documents_forming_balance_signal_detected"]
};
}
if (hasOpenContractsListSignal(text)) {
return {
intent: "list_open_contracts",
confidence: "medium",
reasons: ["open_contract_signal_detected"]
};
}
if (hasAny(text, OPEN_ITEMS_HINTS) &&
(text.includes("контраг") || text.includes("договор") || text.includes("counterparty") || text.includes("contract"))) {
return {
intent: "open_items_by_counterparty_or_contract",
confidence: "medium",
reasons: ["open_items_signal_detected"]
};
}
if (hasContractAnchorSignal(text) &&
hasBankOperationSignal(text)) {
return {
intent: "bank_operations_by_contract",
confidence: "medium",
reasons: ["bank_ops_by_contract_signal_detected"]
};
}
if (hasContractAnchorSignal(text) &&
(hasAny(text, DOCUMENTS_BY_CONTRACT_HINTS) || hasDocumentSignal(text))) {
return {
intent: "list_documents_by_contract",
confidence: "medium",
reasons: ["documents_by_contract_signal_detected"]
};
}
if (hasAny(text, BANK_OPERATIONS_BY_COUNTERPARTY_HINTS) &&
(hasPartyAnchorMention(text) || hasLooseByAnchorMention(text) || hasHeuristicCounterpartyAnchor(text))) {
return {
intent: "bank_operations_by_counterparty",
confidence: "medium",
reasons: ["bank_ops_by_counterparty_signal_detected"]
};
}
if (hasAny(text, DOCUMENTS_BY_COUNTERPARTY_HINTS) &&
(hasPartyAnchorMention(text) ||
hasLooseByAnchorMention(text) ||
hasImplicitCounterpartyAnchorAroundDocs(text) ||
hasHeuristicCounterpartyAnchor(text))) {
return {
intent: "list_documents_by_counterparty",
confidence: "medium",
reasons: ["documents_by_counterparty_signal_detected"]
};
}
if (hasAccountBalanceSignal(text)) {
return {
intent: "account_balance_snapshot",
confidence: "high",
reasons: ["account_balance_signal_detected"]
};
}
if (hasLooseByAnchorMention(text) && hasGenericAddressLookupSignal(text)) {
return {
intent: "list_documents_by_counterparty",
confidence: "low",
reasons: ["generic_lookup_with_loose_anchor_fallback"]
};
}
if (hasAny(text, OPEN_CONTRACTS_HINTS) && (text.includes("договор") || text.includes("contract"))) {
return {
intent: "list_open_contracts",
confidence: "medium",
reasons: ["open_contract_signal_detected"]
};
}
return {
intent: "unknown",
confidence: "low",
reasons: ["intent_not_supported_in_v1"]
};
}