NODEDC_1C/llm_normalizer/backend/dist/services/addressQueryClassifier.js

192 lines
4.8 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.detectAddressQuestionMode = detectAddressQuestionMode;
const ADDRESS_ACTION_TOKENS = [
"show",
"list",
"find",
"get",
"lookup",
"open",
"balance",
"debt",
"owe",
"покажи",
"список",
"найди",
"выведи",
"кто",
"кому",
"какие",
"остаток",
"долг",
"задолж",
"хвост",
"незакрыт"
];
const ADDRESS_ENTITY_TOKENS = [
"counterparty",
"counterparties",
"company",
"organization",
"supplier",
"vendor",
"customer",
"client",
"partner",
"contract",
"contracts",
"account",
"accounts",
"document",
"documents",
"balance",
"payable",
"payables",
"receivable",
"receivables",
"owe",
"owes",
"owed",
"контрагент",
"контра",
"компан",
"организац",
"поставщик",
"клиент",
"покупател",
"партнер",
"банк",
"выписк",
"операц",
"договор",
"счет",
"счёт",
"документ",
"доки",
"док",
"остаток",
"дебитор",
"кредитор",
"аванс",
"оплат",
"долг",
"должен",
"должны",
"должна"
];
const DEEP_REASONING_TOKENS = [
"why",
"because",
"root cause",
"mechanism",
"prove",
"chain",
"почему",
"причин",
"механизм",
"докажи",
"цепоч",
"разрыв",
"ошибк"
];
function hasLooseByAnchorMention(text) {
const match = text.match(/(?:^|\s)по\s+([a-zа-яё][a-zа-яё0-9._-]{1,})(?=[\s,.;:!?)]|$)/iu);
if (!match) {
return false;
}
const token = String(match[1] ?? "").toLowerCase();
if (!token) {
return false;
}
const stopWords = new Set([
"контрагенту",
"контрагента",
"контре",
"компании",
"компанию",
"организации",
"организацию",
"поставщику",
"поставщика",
"клиенту",
"клиента",
"покупателю",
"покупателя",
"партнеру",
"партнера",
"договору",
"договора",
"счету",
"счёту",
"дате",
"периоду",
"период",
"документам",
"докам",
"взаиморасчетам",
"взаиморасчётам"
]);
return !stopWords.has(token);
}
function hasAddressFollowupSignal(text) {
if (/(?:за\s+любой\s+период|за\s+вс[её]\s+время|for\s+all\s+time|all\s+time)/iu.test(text)) {
return true;
}
if (/(?:\bесть\s+что(?:-|\s)?то\b|\bесть\s+ли\b|\bчто\s+есть\b)/iu.test(text)) {
return true;
}
return false;
}
function hasAnyToken(text, tokens) {
return tokens.some((token) => text.includes(token));
}
function detectAddressQuestionMode(userMessage) {
const text = String(userMessage ?? "").trim().toLowerCase();
if (!text) {
return {
mode: "unsupported",
confidence: "low",
reasons: ["empty_message"]
};
}
const hasAddressAction = hasAnyToken(text, ADDRESS_ACTION_TOKENS);
const hasAddressEntity = hasAnyToken(text, ADDRESS_ENTITY_TOKENS);
const hasDeepReasoning = hasAnyToken(text, DEEP_REASONING_TOKENS);
const hasLooseByAnchor = hasLooseByAnchorMention(text);
const hasFollowupSignal = hasAddressFollowupSignal(text);
if (hasAddressAction && hasAddressEntity && !hasDeepReasoning) {
return {
mode: "address_query",
confidence: "high",
reasons: ["address_action_detected", "address_entity_detected"]
};
}
if (hasLooseByAnchor && (hasAddressAction || hasAddressEntity || hasFollowupSignal) && !hasDeepReasoning) {
return {
mode: "address_query",
confidence: "medium",
reasons: ["loose_by_anchor_detected", ...(hasFollowupSignal ? ["address_followup_signal_detected"] : [])]
};
}
if (hasAddressEntity && !hasDeepReasoning) {
return {
mode: "address_query",
confidence: "medium",
reasons: ["address_entity_detected"]
};
}
if (hasDeepReasoning) {
return {
mode: "deep_analysis",
confidence: "high",
reasons: ["deep_reasoning_signal_detected"]
};
}
return {
mode: "unsupported",
confidence: "low",
reasons: ["no_address_or_deep_signal"]
};
}